Java Code Examples for storm.kafka.ZkHosts

The following are top voted examples for showing how to use storm.kafka.ZkHosts. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: RealEstate-Streaming   File: PhoenixTest.java   Source Code and License 6 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
    // BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
    BrokerHosts hosts = new ZkHosts("localhost:2181");
    /*
    String topic = topologyConfig.getProperty("kafka.topic");
    String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
    String consumerGroupId = topologyConfig.getProperty("kafka.consumer.group.id");
    */
    String topic = "addresses";
    String zkRoot = "";
    String consumerGroupId = "group1";

    SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

    return spoutConfig;
}
 
Example 2
Project: storm-topology-examples   File: ConfigureKafkaSpout.java   Source Code and License 6 votes vote down vote up
public static void configureKafkaSpout(TopologyBuilder builder, String zkHostString, String kafkaTopic, 
                                       String kafkaStartOffset, int parallelismHint, String spoutName,
                                       String spoutScheme) {

    LOG.info("KAFKASPOUT: Configuring the KafkaSpout");

    // Configure the KafkaSpout
    SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zkHostString),
            kafkaTopic,      // Kafka topic to read from
            "/" + kafkaTopic, // Root path in Zookeeper for the spout to store consumer offsets
            UUID.randomUUID().toString());  // ID for storing consumer offsets in Zookeeper
    try {
        spoutConfig.scheme = new SchemeAsMultiScheme(getSchemeFromClassName(spoutScheme));
    } catch(Exception e) {
        LOG.error("ERROR: Unable to create instance of scheme: " + spoutScheme);
        e.printStackTrace();
    }
    setKafkaOffset(spoutConfig, kafkaStartOffset);
    
    KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

    // Add the spout and bolt to the topology
    builder.setSpout(spoutName, kafkaSpout, parallelismHint);

}
 
Example 3
Project: storm-kafka-hdfs-starter   File: ConfigureKafkaSpout.java   Source Code and License 6 votes vote down vote up
public static void configureKafkaSpout(TopologyBuilder builder, String zkHostString, String kafkaTopic, String kafkaStartOffset) {

        // Configure the KafkaSpout
        SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zkHostString),
                kafkaTopic,      // Kafka topic to read from
                "/" + kafkaTopic, // Root path in Zookeeper for the spout to store consumer offsets
                UUID.randomUUID().toString());  // ID for storing consumer offsets in Zookeeper
        //spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        spoutConfig.scheme = new SchemeAsMultiScheme(new JsonScheme());

        // Allow for passing in an offset time
        // startOffsetTime has a bug that ignores the special -2 value
        if(kafkaStartOffset == "-2") {
            spoutConfig.forceFromStart = true;
        } else if (kafkaStartOffset != null) {
            spoutConfig.startOffsetTime = Long.parseLong(kafkaStartOffset);
        }
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        // Add the spout and bolt to the topology
        builder.setSpout("kafkaspout", kafkaSpout, 1);

    }
 
Example 4
Project: opensoc-streaming   File: TopologyRunner.java   Source Code and License 6 votes vote down vote up
private boolean initializeKafkaSpout(String name) {
	try {

		BrokerHosts zk = new ZkHosts(config.getString("kafka.zk"));
		String input_topic = config.getString("spout.kafka.topic");
		SpoutConfig kafkaConfig = new SpoutConfig(zk, input_topic, "",
				input_topic);
		kafkaConfig.scheme = new SchemeAsMultiScheme(new RawScheme());
		kafkaConfig.forceFromStart = Boolean.valueOf("True");
		kafkaConfig.startOffsetTime = -1;

		builder.setSpout(name, new KafkaSpout(kafkaConfig),
				config.getInt("spout.kafka.parallelism.hint")).setNumTasks(
				config.getInt("spout.kafka.num.tasks"));

	} catch (Exception e) {
		e.printStackTrace();
		System.exit(0);
	}

	return true;
}
 
Example 5
Project: storm-sample   File: TruckEventProcessorKafkaTopology.java   Source Code and License 6 votes vote down vote up
/**
 * Construct 
 * @return
 */
private SpoutConfig constructKafkaSpoutConf() {
	BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
	String topic = topologyConfig.getProperty("kafka.topic");
	String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
	String consumerGroupId = topologyConfig.getProperty("kafka.consumer.group.id");
	
	SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);
	
	/* Custom TruckScheme that will take Kafka message of single truckEvent 
	 * and emit a 2-tuple consisting of truckId and truckEvent. This driverId
	 * is required to do a fieldsSorting so that all driver events are sent to the set of bolts */
	spoutConfig.scheme = new SchemeAsMultiScheme(new TruckScheme2());
	
	return spoutConfig;
}
 
Example 6
Project: kafka-storm-hive   File: KafkaStormTopology.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) {
    TopologyBuilder builder = new TopologyBuilder();

    SpoutConfig spoutConf = new SpoutConfig(new ZkHosts("localhost:2181", "/brokers"), "test", "/kafkastorm", "KafkaSpout");
    spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
    spoutConf.forceFromStart = true;

    builder.setSpout("KafkaSpout", new KafkaSpout(spoutConf), 3);
    builder.setBolt("KafkaBolt", new PrinterBolt(), 3).shuffleGrouping("KafkaSpout");

    Config conf = new Config();
    // conf.setDebug(true);

    LocalCluster cluster = new LocalCluster();
    cluster.submitTopology("kafka-test", conf, builder.createTopology());

    Utils.sleep(60000);
    cluster.shutdown();
}
 
Example 7
Project: kafka-storm-hive   File: HDFSSequenceTopology.java   Source Code and License 6 votes vote down vote up
public static StormTopology buildTopology(String hdfsUrl) {
    TridentKafkaConfig tridentKafkaConfig = new TridentKafkaConfig(new ZkHosts(ZKHOST, "/brokers"), KAFKA_TOPIC);
    tridentKafkaConfig.scheme = new SchemeAsMultiScheme(new RawScheme());
    tridentKafkaConfig.startOffsetTime = -1; // forceStartOffsetTime(-1); //Read latest messages from Kafka

    TransactionalTridentKafkaSpout tridentKafkaSpout = new TransactionalTridentKafkaSpout(tridentKafkaConfig);

    TridentTopology topology = new TridentTopology();

    Stream stream = topology.newStream("stream", tridentKafkaSpout);

    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(HDFS_OUT_PATH).withPrefix("trident").withExtension(".txt");
    FileRotationPolicy rotationPolicy = new FileSizeCountRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB, 10);
    HdfsState.Options seqOpts = new HdfsState.HdfsFileOptions().withFileNameFormat(fileNameFormat)
            .withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter("|").withFields(new Fields("json")))
            .withRotationPolicy(rotationPolicy).withFsUrl(hdfsUrl)
            // .addRotationAction(new MoveFileAction().toDestination(HDFS_ROTATE_PATH));
            // .addRotationAction(new AddSuffixFileAction().withSuffix("-processed"));
            .addRotationAction(new MD5FileAction());
    StateFactory factory = new HdfsStateFactory().withOptions(seqOpts);

    stream.each(new Fields("bytes"), new JacksonJsonParser(), new Fields("json")).partitionPersist(factory, new Fields("json"),
            new HdfsUpdater(), new Fields());

    return topology.build();
}
 
Example 8
Project: trident-tutorial   File: TopHashtagByFollowerClass.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config conf = new Config();

    if (args.length == 2) {
        // Ready & submit the topology
        String name = args[0];
        BrokerHosts hosts = new ZkHosts(args[1]);
        TransactionalTridentKafkaSpout kafkaSpout = TestUtils.testTweetSpout(hosts);

        StormSubmitter.submitTopology(name, conf, buildTopology(kafkaSpout));

    }else{
        System.err.println("<topologyName> <zookeeperHost>");
    }

}
 
Example 9
Project: trident-tutorial   File: GlobalTop20Hashtags.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config conf = new Config();

    if (args.length == 2) {
        // Ready & submit the topology
        String name = args[0];
        BrokerHosts hosts = new ZkHosts(args[1]);
        TransactionalTridentKafkaSpout kafkaSpout = TestUtils.testTweetSpout(hosts);

        StormSubmitter.submitTopology(name, conf, buildTopology(kafkaSpout));

    }else{
        System.err.println("<topologyName> <zookeeperHost>");
    }

}
 
Example 10
Project: trident-tutorial   File: JoinExample.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config conf = new Config();

    if (args.length == 2) {
        // Ready & submit the topology
        String name = args[0];
        BrokerHosts hosts = new ZkHosts(args[1]);
        TransactionalTridentKafkaSpout kafkaSpout = TestUtils.testTweetSpout(hosts);

        StormSubmitter.submitTopology(name, conf, buildTopology(kafkaSpout));

    }else{
        System.err.println("<topologyName> <zookeeperHost>");
    }

}
 
Example 11
Project: trident-tutorial   File: RealTimeTextSearch.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config conf = new Config();


    if (args.length == 2) {
        // Ready & submit the topology
        String name = args[0];
        BrokerHosts hosts = new ZkHosts(args[1]);
        TransactionalTridentKafkaSpout kafkaSpout = TestUtils.testTweetSpout(hosts);

        StormSubmitter.submitTopology(name, conf, buildTopology(kafkaSpout));

    }else{
        System.err.println("<topologyName> <zookeeperHost>");
    }

}
 
Example 12
Project: trident-tutorial   File: TopHashtagByCountry.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config conf = new Config();


    if (args.length == 2) {
        // Ready & submit the topology
        String name = args[0];
        BrokerHosts hosts = new ZkHosts(args[1]);
        TransactionalTridentKafkaSpout kafkaSpout = TestUtils.testTweetSpout(hosts);

        StormSubmitter.submitTopology(name, conf, buildTopology(kafkaSpout));

    }else{
        System.err.println("<topologyName> <zookeeperHost>");
    }

}
 
Example 13
Project: trident-tutorial   File: TopHashtagFollowerCountGrouping.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config conf = new Config();
    conf.setNumWorkers(6);

    if (args.length == 2) {
        // Ready & submit the topology
        String name = args[0];
        BrokerHosts hosts = new ZkHosts(args[1]);
        TransactionalTridentKafkaSpout kafkaSpout = TestUtils.testTweetSpout(hosts);

        StormSubmitter.submitTopology(name, conf, buildTopology(kafkaSpout));

    }else{
        System.err.println("<topologyName> <zookeeperHost>");
    }

}
 
Example 14
Project: openbus   File: kafkaSpoutTest.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) {
	
	
	Config conf = new Config();
	// conf.put(Config.TOPOLOGY_DEBUG,true);
	conf.put(StormElasticSearchConstants.ES_CLUSTER_NAME,"elasticsearch");
	conf.put(StormElasticSearchConstants.ES_HOST,"localhost");
	conf.put(StormElasticSearchConstants.ES_PORT,9300);
	
	ZkHosts zooHosts = new ZkHosts("localhost:50003");
    TopologyBuilder builder = new TopologyBuilder();
    SpoutConfig spoutConfig = new SpoutConfig(zooHosts, "test", "", "STORM-ID");

    //spoutConfig.scheme=new StringScheme();
   // spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    KafkaSpout spout1 =  new KafkaSpout(spoutConfig);
    builder.setSpout("source", spout1, 1);
	builder.setBolt("echo", new EchoBolt(), 1).shuffleGrouping("source");

	LocalCluster cluster = new LocalCluster();
	cluster.submitTopology("basic_primitives", conf,
			builder.createTopology());
}
 
Example 15
Project: rb-bi   File: ZkBrokerReader.java   Source Code and License 5 votes vote down vote up
public ZkBrokerReader(Map conf, String topic, ZkHosts hosts) {
	try {
		reader = new DynamicBrokersReader(conf, hosts.brokerZkStr, hosts.brokerZkPath, topic);
		cachedBrokers = reader.getBrokerInfo();
		lastRefreshTimeMs = System.currentTimeMillis();
		refreshMillis = hosts.refreshFreqSecs * 1000L;
	} catch (java.net.SocketTimeoutException e) {
		LOG.warn("Failed to update brokers", e);
	}

}
 
Example 16
Project: rb-bi   File: TridentKafkaSpout.java   Source Code and License 5 votes vote down vote up
/**
 * Constructor
 *
 * @param config Config file to read properties from
 * @param section Section of the kafka config file to read properties from.
 */
public TridentKafkaSpout(ConfigData config, String section) {
    _kafkaConfig = new TridentKafkaConfig(new ZkHosts(config.getZkHost()), config.getTopic(section), "stormKafka");
    _kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    _kafkaConfig.bufferSizeBytes = config.getFetchSizeKafka();
    _kafkaConfig.fetchSizeBytes = config.getFetchSizeKafka();
    _kafkaConfig.forceFromStart = false;
}
 
Example 17
Project: rb-bi   File: TridentKafkaSpoutNmsp.java   Source Code and License 5 votes vote down vote up
/**
 * Constructor
 *
 * @param config Config file to read properties from
 * @param section Section of the kafka config file to read properties from.
 */
public TridentKafkaSpoutNmsp(ConfigData config, String section) {
    _kafkaConfig = new TridentKafkaConfig(new ZkHosts(config.getZkHost()), config.getTopic(section), "stormKafka");
    _kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    _kafkaConfig.bufferSizeBytes = config.getFetchSizeKafkaNmsp();
    _kafkaConfig.fetchSizeBytes = config.getFetchSizeKafkaNmsp();
    _kafkaConfig.forceFromStart = false;
}
 
Example 18
Project: rb-bi   File: TridentKafkaSpoutLocation.java   Source Code and License 5 votes vote down vote up
/**
 * Constructor
 *
 * @param config Config file to read properties from
 * @param section Section of the kafka config file to read properties from.
 */
public TridentKafkaSpoutLocation(ConfigData config, String section) {
    _kafkaConfig = new TridentKafkaConfig(new ZkHosts(config.getZkHost()), config.getTopic(section), "stormKafka");
    _kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    _kafkaConfig.bufferSizeBytes = config.getFetchSizeKafkaLocation();
    _kafkaConfig.fetchSizeBytes = config.getFetchSizeKafkaLocation();
    _kafkaConfig.forceFromStart = false;
}
 
Example 19
Project: storm-kafka-examples   File: CounterTopology.java   Source Code and License 5 votes vote down vote up
/**
 * @param args
 * http://www.programcreek.com/java-api-examples/index.php?api=storm.kafka.KafkaSpout
 */
public static void main(String[] args) {
	try{
		//设置喷发节点并分配并发数,该并发数将会控制该对象在集群中的线程数(6个)
		String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
		String topic = "order";
		String groupId = "id";
		int spoutNum = 3;
		int boltNum = 1;
		ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
		SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
		spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
		builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");

        Config config = new Config();
        config.setDebug(true);
        
        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {        
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
	}catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example 20
Project: RealEstate-Streaming   File: KafkaPhoenixTopology.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
    // BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
    BrokerHosts hosts = new ZkHosts("localhost:2181");

    String topic = "properties";
    String zkRoot = "";
    String consumerGroupId = "group1";

    SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

    return spoutConfig;
}
 
Example 21
Project: storm-kafka-hdfs-example   File: KafkaSpoutConfigBuilder.java   Source Code and License 5 votes vote down vote up
public KafkaSpout getKafkaSpout() {

        LOG.info("KAFKASPOUT: Configuring the Kafka Spout");

        // Create the initial spoutConfig
        SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zookeeperConnectionString),
                kafkaTopic,      // Kafka topic to read from
                "/" + kafkaTopic, // Root path in Zookeeper for the spout to store consumer offsets
                UUID.randomUUID().toString());  // ID for storing consumer offsets in Zookeeper

        // Set the scheme
        try {
            spoutConfig.scheme = new SchemeAsMultiScheme(getSchemeFromClassName(spoutSchemeClass));
        } catch(Exception e) {
            LOG.error("ERROR: Unable to create instance of scheme: " + spoutSchemeClass);
            e.printStackTrace();
        }

        // Set the offset
        setKafkaOffset(spoutConfig, kafkaStartOffset);

        // Create the kafkaSpout
        return new KafkaSpout(spoutConfig);

    }
 
Example 22
Project: iot-masterclass   File: TruckEventProcessorKafkaTopology.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
  BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
  String topic = topologyConfig.getProperty("kafka.topic");
  String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
  String consumerGroupId = topologyConfig.getProperty("kafka.consumer.group.id");

  SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);

/* Custom TruckScheme that will take Kafka message of single truckEvent
 * and emit a 2-tuple consisting of truckId and truckEvent. This driverId
 * is required to do a fieldsSorting so that all driver events are sent to the set of bolts */
  spoutConfig.scheme = new SchemeAsMultiScheme(new TruckScheme2());

  return spoutConfig;
}
 
Example 23
Project: iot-lab   File: TruckEventProcessorKafkaTopology.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
  BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
  String topic = topologyConfig.getProperty("kafka.topic");
  String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
  String consumerGroupId = topologyConfig.getProperty("kafka.consumer.group.id");

  SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, consumerGroupId);

/* Custom TruckScheme that will take Kafka message of single truckEvent
 * and emit a 2-tuple consisting of truckId and truckEvent. This driverId
 * is required to do a fieldsSorting so that all driver events are sent to the set of bolts */
  spoutConfig.scheme = new SchemeAsMultiScheme(new TruckScheme2());

  return spoutConfig;
}
 
Example 24
Project: Big-Data-tutorial   File: FlightTopology.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {

		String zkIp = "localhost";

		String nimbusHost = "sandbox.hortonworks.com";

		String zookeeperHost = zkIp +":2181";

		ZkHosts zkHosts = new ZkHosts(zookeeperHost);
		List<String> zkServers = new ArrayList<String>();
		zkServers.add(zkIp);
		SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, "spertus-flight-events", "/spertus-flights-events","flight_id");
		kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		kafkaConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		kafkaConfig.zkServers = zkServers;
		kafkaConfig.zkRoot = "/spertus-flight-events";
		kafkaConfig.zkPort = 2181;
		kafkaConfig.forceFromStart = true;
		KafkaSpout kafkaSpout = new KafkaSpout(kafkaConfig);

		TopologyBuilder builder = new TopologyBuilder();

		builder.setSpout("flight-events", kafkaSpout, 1);
		builder.setBolt("flight-stats", new GetFlightStatsBolt(), 1).shuffleGrouping("flight-events");

		Map conf = new HashMap();
		conf.put(backtype.storm.Config.TOPOLOGY_WORKERS, 4);
		conf.put(backtype.storm.Config.TOPOLOGY_DEBUG, true);
		if (args != null && args.length > 0) {
			StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
		}   else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("flight-topology", conf, builder.createTopology());
		}
	}
 
Example 25
Project: LearnStorm   File: LogAnalyzer.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
		final BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
		final String topic = topologyConfig.getProperty("kafka.topic");
		final String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
//		String consumerGroupId = UUID.randomUUID().toString();
		final SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, CONSUMER_GROUP_ID);
		spoutConfig.startOffsetTime = System.currentTimeMillis();
		spoutConfig.scheme = new SchemeAsMultiScheme(logScheme);
		spoutConfig.retryInitialDelayMs = 10000;	// 10 seconds
		spoutConfig.retryDelayMultiplier = 1.1;		// 10, 11, 12.1, 13.31, 14.641... 
		spoutConfig.retryDelayMaxMs = 590000;		// about 10 minutes
		return spoutConfig;
	}
 
Example 26
Project: LearnStorm   File: ApLogAnalyzer.java   Source Code and License 5 votes vote down vote up
private SpoutConfig constructKafkaSpoutConf() {
		final BrokerHosts hosts = new ZkHosts(topologyConfig.getProperty("kafka.zookeeper.host.port"));
		final String topic = topologyConfig.getProperty("kafka.topic");
		final String zkRoot = topologyConfig.getProperty("kafka.zkRoot");
//		String consumerGroupId = UUID.randomUUID().toString();
		final SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, zkRoot, CONSUMER_GROUP_ID);
		spoutConfig.startOffsetTime = System.currentTimeMillis();
		spoutConfig.scheme = new SchemeAsMultiScheme(apLogScheme);
		spoutConfig.retryInitialDelayMs = 10000;	// 10 seconds
		spoutConfig.retryDelayMultiplier = 1.1;		// 10, 11, 12.1, 13.31, 14.641... 
		spoutConfig.retryDelayMaxMs = 590000;		// about 10 minutes
		return spoutConfig;
	}
 
Example 27
Project: LearnStorm   File: TridentKafkaWordCount.java   Source Code and License 5 votes vote down vote up
/**
 * Creates a transactional kafka spout that consumes any new data published to "test" topic.
 * <p/>
 * For more info on transactional spouts
 * see "Transactional spouts" section in
 * <a href="https://storm.apache.org/documentation/Trident-state"> Trident state</a> doc.
 *
 * @return a transactional trident kafka spout.
 */
private TransactionalTridentKafkaSpout createKafkaSpout() {
    ZkHosts hosts = new ZkHosts(zkUrl);
    TridentKafkaConfig config = new TridentKafkaConfig(hosts, KAFKA_TOPIC);
    config.scheme = new SchemeAsMultiScheme(new StringScheme());

    // Consume new data from the topic
    config.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
    return new TransactionalTridentKafkaSpout(config);
}
 
Example 28
Project: realtime-event-processing   File: DocEventProcessingTopology.java   Source Code and License 5 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC drpc) {

        TridentTopology topology = new TridentTopology();

        //Kafka Spout
        BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
        TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_DOCUMENT_NAME));
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

        //ElasticSearch Persistent State
        Settings esSettings = ImmutableSettings.settingsBuilder()
                .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
                .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
                .build();
        StateFactory esStateFactory = new ESIndexState.Factory<JSONObject>(new ClientFactory.NodeClient(esSettings.getAsMap()), JSONObject.class);
        TridentState esStaticState = topology.newStaticState(esStateFactory);

        String esIndex = (String)(conf.get(CrawlerConfig.ELASTICSEARCH_INDEX_NAME));
        topology.newStream("docstream",spout)
                .each( new Fields("str"), new SplitDocStreamArgs(), new Fields("filename", "task", "user", "content"))
                .each( new Fields("filename", "task", "user"), new PrintFilter("Kafka"))
                .each( new Fields("filename","task","user","content"), new PrepareDocForElasticSearch(), new Fields("index","type","id","source") )
                .partitionPersist(esStateFactory, new Fields("index","type","id","source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()), new Fields());

        return topology.build();
    }
 
Example 29
Project: StormSampleProject   File: SentimentAnalysisTopology.java   Source Code and License 5 votes vote down vote up
private static StormTopology createTopology()
{
    SpoutConfig kafkaConf = new SpoutConfig(
        new ZkHosts(Properties.getString("rts.storm.zkhosts")),
        KAFKA_TOPIC,
        "/kafka",
        "KafkaSpout");
    kafkaConf.scheme = new SchemeAsMultiScheme(new StringScheme());
    TopologyBuilder topology = new TopologyBuilder();

    topology.setSpout("kafka_spout", new KafkaSpout(kafkaConf), 4);

    topology.setBolt("twitter_filter", new TwitterFilterBolt(), 4)
            .shuffleGrouping("kafka_spout");

    topology.setBolt("text_filter", new TextFilterBolt(), 4)
            .shuffleGrouping("twitter_filter");

    topology.setBolt("stemming", new StemmingBolt(), 4)
            .shuffleGrouping("text_filter");

    topology.setBolt("positive", new PositiveSentimentBolt(), 4)
            .shuffleGrouping("stemming");
    topology.setBolt("negative", new NegativeSentimentBolt(), 4)
            .shuffleGrouping("stemming");

    topology.setBolt("join", new JoinSentimentsBolt(), 4)
            .fieldsGrouping("positive", new Fields("tweet_id"))
            .fieldsGrouping("negative", new Fields("tweet_id"));

    topology.setBolt("score", new SentimentScoringBolt(), 4)
            .shuffleGrouping("join");

    topology.setBolt("hdfs", new HDFSBolt(), 4)
            .shuffleGrouping("score");
    topology.setBolt("nodejs", new NodeNotifierBolt(), 4)
            .shuffleGrouping("score");

    return topology.createTopology();
}
 
Example 30
Project: storm-smoke-test   File: TridentConnectorUtil.java   Source Code and License 5 votes vote down vote up
public static OpaqueTridentKafkaSpout getTridentKafkaEmitter(String zkConnString, String topicName, Map topologyConfig) {
    BrokerHosts hosts = new ZkHosts(zkConnString);
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, topicName);
    //topologyConfig.put("topology.spout.max.batch.size", 1);
    //kafkaConfig.forceFromStart = true;
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new OpaqueTridentKafkaSpout(kafkaConfig);
}
 
Example 31
Project: partial-key-grouping   File: WordCountPartialKeyGrouping.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    String kafkaZk = "zookeeper:2181"; // change it to your zookeeper server
    BrokerHosts brokerHosts = new ZkHosts(kafkaZk);

    SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, "name_of_kafka_topic", "", "test"); // change it to the name of your kafka topic
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    kafkaConfig.forceFromStart = true;

    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout("stream", new KafkaSpout(kafkaConfig), 1);
    builder.setBolt("split", new SplitterBolt(), 8).shuffleGrouping("stream");
    builder.setBolt("counter", new CounterBolt(), 10).customGrouping("split", new PartialKeyGrouping());
    builder.setBolt("aggregator", new AggregatorBolt(), 1).fieldsGrouping("counter", new Fields("word"));

    Config conf = new Config();
    conf.setDebug(false);
    conf.setMaxSpoutPending(100);
    // conf.setMessageTimeoutSecs(300); // optionally increase the timeout for tuples

    if (args != null && args.length > 0) {
        conf.setNumWorkers(10);
        StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
    } else {
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("test", conf, builder.createTopology());
        Utils.sleep(15000000);
        cluster.killTopology("test");
        cluster.shutdown();
    }
}
 
Example 32
Project: incubator-storm   File: ZkBrokerReader.java   Source Code and License 5 votes vote down vote up
public ZkBrokerReader(Map conf, String topic, ZkHosts hosts) {
    reader = new DynamicBrokersReader(conf, hosts.brokerZkStr, hosts.brokerZkPath, topic);
    cachedBrokers = reader.getBrokerInfo();
    lastRefreshTimeMs = System.currentTimeMillis();
    refreshMillis = hosts.refreshFreqSecs * 1000L;

}
 
Example 33
Project: recsys-online   File: Recsys.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException,
			InvalidTopologyException {
		logger.info("begin to running recsys.");
		BrokerHosts brokerHosts = new ZkHosts(Constants.kafka_zk_address);
		SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, Constants.kafka_topic, 	Constants.kafka_zk_root, Constants.kafka_id);

		Config conf = new Config();
		Map<String, String> map = new HashMap<String, String>();
		map.put("metadata.broker.list", Constants.kakfa_broker_list);
		map.put("serializer.class", "kafka.serializer.StringEncoder");
		conf.put("kafka.broker.properties", map);
//		conf.put("topic", "topic2");

		spoutConfig.scheme = new SchemeAsMultiScheme(new MessageScheme());
		TopologyBuilder builder = new TopologyBuilder();
		builder.setSpout("spout", new KafkaSpout(spoutConfig));
		builder.setBolt("bolt", new HBaseStoreBolt()).shuffleGrouping("spout");
//		builder.setBolt("kafkabolt", new KafkaBolt<String, Integer>()).shuffleGrouping("bolt");

		if (!islocal) {
			conf.setNumWorkers(3);
			StormSubmitter.submitTopology(Constants.storm_topology_name, conf, builder.createTopology());
		} else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology(Constants.storm_topology_name, conf, builder.createTopology());
			Utils.sleep(100000);
			cluster.killTopology(Constants.storm_topology_name);
			cluster.shutdown();
		}
		logger.info("run recsys finish.");
	}
 
Example 34
Project: storm-benchmark   File: KafkaUtils.java   Source Code and License 5 votes vote down vote up
public static SpoutConfig getSpoutConfig(Map options, MultiScheme scheme) throws IllegalArgumentException {
  String zkServers = (String) Utils.get(options, ZOOKEEPER_SERVERS, "localhost:2181");
  String kafkaRoot = (String) Utils.get(options, KAFKA_ROOT_PATH, "/kafka");
  String connectString = zkServers + kafkaRoot;

  BrokerHosts hosts = new ZkHosts(connectString);
  String topic = (String) Utils.get(options, TOPIC, DEFAULT_TOPIC);
  String zkRoot = kafkaRoot + "/" + "storm-consumer-states";
  String appId = (String) Utils.get(options, CLIENT_ID, "storm-app");

  SpoutConfig config = new SpoutConfig(hosts, topic, zkRoot, appId);
  config.zkServers = new ArrayList<String>();

  String [] servers = zkServers.split(",");

  for (int i = 0; i < servers.length; i++) {
    String[] serverAndPort = servers[0].split(":");
    config.zkServers.add(serverAndPort[0]);
    int port = Integer.parseInt(serverAndPort[1]);
    if (i == 0) {
      config.zkPort = port;
    }

    if (config.zkPort != port) {
      throw new IllegalArgumentException("The zookeeper port on all  server must be same");
    }
  }
  config.scheme = scheme;
  return config;
}
 
Example 35
Project: storm-benchmark   File: KafkaUtils.java   Source Code and License 5 votes vote down vote up
public static TridentKafkaConfig getTridentKafkaConfig(Map options, MultiScheme scheme) {
  String zkServers = (String) Utils.get(options, ZOOKEEPER_SERVERS, "localhost:2181") ;
  String kafkaRoot = (String) Utils.get(options, KAFKA_ROOT_PATH, "/kafka");
  String connectString = zkServers + kafkaRoot;

  BrokerHosts hosts = new ZkHosts(connectString);
  String topic = (String) Utils.get(options, TOPIC, DEFAULT_TOPIC);
  String appId = (String) Utils.get(options, CLIENT_ID, "storm-app");

  TridentKafkaConfig config = new TridentKafkaConfig(hosts, topic, appId);
  config.scheme = scheme;
  return config;
}
 
Example 36
Project: kappaeg   File: CDRStormTopology.java   Source Code and License 5 votes vote down vote up
private KafkaSpout constructCDRKafkaSpout() {
    BrokerHosts zkhosts = new ZkHosts(globalconfigs.getProperty("cdrstorm.kafkaspout.zkhosts"));
    String topic = globalconfigs.getProperty("cdr.kafkatopic");
    String zkRoot = globalconfigs.getProperty("cdrstorm.kafkaspout.zkroot");
    String consumerGroupId = globalconfigs.getProperty("cdrstorm.kafkaspout.cdr.consumergroupid");
    SpoutConfig spoutConfig = new SpoutConfig(zkhosts, topic, zkRoot, consumerGroupId);
    spoutConfig.scheme = new SchemeAsMultiScheme(new CDRScheme());
    KafkaSpout kafkaspout = new KafkaSpout(spoutConfig);
    return kafkaspout;
}
 
Example 37
Project: kappaeg   File: CDRStormTopology.java   Source Code and License 5 votes vote down vote up
private KafkaSpout constructTwitterKafkaSpout() {
    BrokerHosts zkhosts = new ZkHosts(globalconfigs.getProperty("cdrstorm.kafkaspout.zkhosts"));
    String topic = globalconfigs.getProperty("twitter4j.kafkatopic");
    String zkRoot = globalconfigs.getProperty("cdrstorm.kafkaspout.zkroot");
    String consumerGroupId = globalconfigs.getProperty("cdrstorm.kafkaspout.cdr.consumergroupid");
    SpoutConfig spoutConfig = new SpoutConfig(zkhosts, topic, zkRoot, consumerGroupId);
    
    //Create scheme for Twitter
    spoutConfig.scheme = new SchemeAsMultiScheme(new TwitterScheme());
    
    KafkaSpout kafkaspout = new KafkaSpout(spoutConfig);
    return kafkaspout;
}
 
Example 38
Project: flink-perf   File: KafkaThroughput.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	TopologyBuilder builder = new TopologyBuilder();
	BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper"));
	SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
	builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism"));

	builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source");

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(pt.getInt("par", 2));

		StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology());
	} else {
		conf.setMaxTaskParallelism(pt.getInt("par", 2));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-spout", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}
}
 
Example 39
Project: openbus   File: BrokerSpout.java   Source Code and License 5 votes vote down vote up
public BrokerSpout(String kafkaTopic, String zookeeperHosts, String idClient, boolean forceFromStart) {

	zhost = new ZkHosts(zookeeperHosts);
	config = new TridentKafkaConfig(zhost, kafkaTopic); // 3er parametro
							    // idClient
	config.forceFromStart = forceFromStart;
	if (! forceFromStart){
	    config.startOffsetTime = -1;
	}
	LOG.info("BrokerSpout. zookeperHosts: " + zookeeperHosts + " topic: " + kafkaTopic + " idClient: " + idClient);
    }
 
Example 40
Project: openbus   File: BrokerSpout.java   Source Code and License 5 votes vote down vote up
public BrokerSpout(String kafkaTopic, String zookeeperHosts, String idClient, boolean forceFromStart) {
       zhost = new ZkHosts(zookeeperHosts);
       config = new TridentKafkaConfig(zhost, kafkaTopic, idClient);
       config.forceFromStart = forceFromStart;

       LOG.info("BrokerSpout. zookeperHosts: " + zookeeperHosts + " topic: " + kafkaTopic + " idClient: " + idClient);
}
 
Example 41
Project: storm-kafka-0.8-plus   File: ZkBrokerReader.java   Source Code and License 5 votes vote down vote up
public ZkBrokerReader(Map conf, String topic, ZkHosts hosts) {
    reader = new DynamicBrokersReader(conf, hosts.brokerZkStr, hosts.brokerZkPath, topic);
    cachedBrokers = reader.getBrokerInfo();
    lastRefreshTimeMs = System.currentTimeMillis();
    refreshMillis = hosts.refreshFreqSecs * 1000L;

}
 
Example 42
Project: storm-kafka-examples   File: HdfsTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {
    try{
        String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
        String topic = "order";
        String groupId = "id";
        int spoutNum = 3;
        int boltNum = 1;
        ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
        SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        // HDFS bolt
        // use "|" instead of "," for field delimiter
        RecordFormat format = new DelimitedRecordFormat()
                .withFieldDelimiter("|");

        // sync the filesystem after every 1k tuples
        SyncPolicy syncPolicy = new CountSyncPolicy(1000);

        // rotate files when they reach 5MB
        FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);
        // FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);

        FileNameFormat fileNameFormat = new DefaultFileNameFormat()
                .withPath("/tmp/").withPrefix("order_").withExtension(".log");

        HdfsBolt hdfsBolt = new HdfsBolt()
                .withFsUrl("hdfs://wxb-1:8020")
                .withFileNameFormat(fileNameFormat)
                .withRecordFormat(format)
                .withRotationPolicy(rotationPolicy)
                .withSyncPolicy(syncPolicy);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
        builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");
        builder.setBolt("hdfs", hdfsBolt,boltNum).shuffleGrouping("counter");

        Config config = new Config();
        config.setDebug(true);

        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
    }catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 43
Project: streaming_outliers   File: Topology.java   Source Code and License 4 votes vote down vote up
public static TopologyBuilder createTopology( DataPointExtractorConfig extractorConfig
                                            , com.caseystella.analytics.outlier.streaming.OutlierConfig streamingOutlierConfig
                                            , PersistenceConfig persistenceConfig
                                            , String kafkaTopic
                                            , String zkQuorum
                                            , String esNode
                                            , int numWorkers
                                            , int numSpouts
                                            , int numIndexers
                                            , String indexName
                                            , boolean startAtBeginning
                                     )
{
    TopologyBuilder builder = new TopologyBuilder();
    String spoutId = "outlier_filter";
    String boltId= "outlier";
    OutlierKafkaSpout spout = null;
    {
        //BrokerHosts hosts, String topic, String zkRoot, String id
        SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts(zkQuorum)
                                                 , kafkaTopic
                                                 , "/kafka"
                                                 , "streaming_outlier"
                                                 );
        spoutConfig.startOffsetTime = startAtBeginning?kafka.api.OffsetRequest.EarliestTime()
                                                      :kafka.api.OffsetRequest.LatestTime()
                                                      ;
        if(startAtBeginning) {
            spoutConfig.ignoreZkOffsets = true;
        }
        spout = new OutlierKafkaSpout(spoutConfig
                                     , extractorConfig
                                     , streamingOutlierConfig.getGroupingKeys()
                                     , zkQuorum
                                     );
    }
    OutlierBolt bolt = null;
    {
        bolt = new OutlierBolt(kafkaTopic, streamingOutlierConfig, persistenceConfig);
    }
    builder.setSpout(spoutId, spout, numSpouts);
    builder.setBolt(boltId, bolt, numWorkers).fieldsGrouping(spoutId, new Fields(Constants.GROUP_ID));
    {
        Map conf = new HashMap();
        if(esNode != null) {
            conf.put(ConfigurationOptions.ES_NODES, esNode);
        }
        conf.put(ConfigurationOptions.ES_BATCH_SIZE_ENTRIES, "1");
        conf.put(ConfigurationOptions.ES_INDEX_AUTO_CREATE, true);
        builder.setBolt("es_bolt", new EsBolt(indexName, conf), numIndexers)
               .shuffleGrouping(boltId, OutlierBolt.STREAM_ID);
    }
    return builder;
}
 
Example 44
Project: web-crawler   File: WebCrawlerTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC localDrpc) {
    TridentTopology topology = new TridentTopology();

    //Kafka Spout
    BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_NAME));
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

    //ElasticSearch Persistent State
    Settings esSettings = ImmutableSettings.settingsBuilder()
            .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
            .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
            .build();
    StateFactory esStateFactory = new ESIndexState.Factory<String>(new ClientFactory.NodeClient(esSettings.getAsMap()), String.class);
    TridentState esStaticState = topology.newStaticState(esStateFactory);

    //Topology
    topology.newStream("crawlKafkaSpout", spout).parallelismHint(5)
             //Splits url and depth information on receiving from Kafka
            .each(new Fields("str"), new SplitKafkaInput(), new Fields("url", "depth"))
            //Bloom Filter. Filters already crawled URLs
            .each(new Fields("url"), new URLFilter())
            //Download and Parse Webpage
            .each(new Fields("url"), new GetAdFreeWebPage(), new Fields("content_html", "title", "href"))//TODO Optimize
            //Add Href URls to Kafka queue
            .each(new Fields("href", "depth"), new KafkaProducerFilter())//TODO Replace with kafka persistent state.
            //Insert to Elasticsearch
            .each(new Fields("url", "content_html", "title"), new PrepareForElasticSearch(), new Fields("index", "type", "id", "source"))
            .partitionPersist(esStateFactory, new Fields("index", "type", "id", "source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()))
    ;

    //DRPC
    topology.newDRPCStream("search", localDrpc)
            .each(new Fields("args"), new SplitDRPCArgs(), new Fields("query_input"))
            .each(new Fields("query_input"), new BingAutoSuggest(0), new Fields("query_preProcessed"))//TODO return List of expanded query
            .each(new Fields("query_preProcessed"), new PrepareSearchQuery(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(esStaticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("results"))
    ;

    return topology.build();
}
 
Example 45
Project: Big-Data-tutorial   File: WeatherTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {

		String zkIp = "localhost";

		String nimbusHost = "sandbox.hortonworks.com";

		String zookeeperHost = zkIp +":2181";

		ZkHosts zkHosts = new ZkHosts(zookeeperHost);
		List<String> zkServers = new ArrayList<String>();
		zkServers.add(zkIp);
		SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, "spertus-weather-events", "/spertus-weather-events","test_id");
		kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		kafkaConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		kafkaConfig.zkServers = zkServers;
		kafkaConfig.zkRoot = "/spertus-weather-events";
		kafkaConfig.zkPort = 2181;
		kafkaConfig.forceFromStart = true;
		KafkaSpout kafkaSpout = new KafkaSpout(kafkaConfig);

		TopologyBuilder builder = new TopologyBuilder();

		HdfsBolt hdfsBolt = new HdfsBolt().withFsUrl("hdfs://sandbox.hortonworks.com:8020")
				.withFileNameFormat(new DefaultFileNameFormat().withPath("/tmp/test"))
				.withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter("|"))
				.withSyncPolicy(new CountSyncPolicy(10))
				.withRotationPolicy(new FileSizeRotationPolicy(5.0f, Units.MB));
		builder.setSpout("raw-weather-events", kafkaSpout, 1);
		builder.setBolt("filter-airports", new FilterAirportsBolt(), 1).shuffleGrouping("raw-weather-events");
		//        builder.setBolt("test-bolt", new TestBolt(), 1).shuffleGrouping("raw-weather-events");
		//        builder.setBolt("hdfs-bolt", hdfsBolt, 1).shuffleGrouping("raw-weather-events");


		Map conf = new HashMap();
		conf.put(backtype.storm.Config.TOPOLOGY_WORKERS, 4);
		conf.put(backtype.storm.Config.TOPOLOGY_DEBUG, true);
		if (args != null && args.length > 0) {
			StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
		}   else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("weather-topology", conf, builder.createTopology());
		}
	}
 
Example 46
Project: streaming-benchmarks   File: AdvertisingTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    TopologyBuilder builder = new TopologyBuilder();

    Options opts = new Options();
    opts.addOption("conf", true, "Path to the config file.");

    CommandLineParser parser = new DefaultParser();
    CommandLine cmd = parser.parse(opts, args);
    String configPath = cmd.getOptionValue("conf");
    Map commonConfig = Utils.findAndReadConfigFile(configPath, true);

    String zkServerHosts = joinHosts((List<String>)commonConfig.get("zookeeper.servers"),
                                     Integer.toString((Integer)commonConfig.get("zookeeper.port")));
    String redisServerHost = (String)commonConfig.get("redis.host");
    String kafkaTopic = (String)commonConfig.get("kafka.topic");
    int kafkaPartitions = ((Number)commonConfig.get("kafka.partitions")).intValue();
    int workers = ((Number)commonConfig.get("storm.workers")).intValue();
    int ackers = ((Number)commonConfig.get("storm.ackers")).intValue();
    int cores = ((Number)commonConfig.get("process.cores")).intValue();
    int parallel = Math.max(1, cores/7);

    ZkHosts hosts = new ZkHosts(zkServerHosts);



    SpoutConfig spoutConfig = new SpoutConfig(hosts, kafkaTopic, "/" + kafkaTopic, UUID.randomUUID().toString());
    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

    builder.setSpout("ads", kafkaSpout, kafkaPartitions);
    builder.setBolt("event_deserializer", new DeserializeBolt(), parallel).shuffleGrouping("ads");
    builder.setBolt("event_filter", new EventFilterBolt(), parallel).shuffleGrouping("event_deserializer");
    builder.setBolt("event_projection", new EventProjectionBolt(), parallel).shuffleGrouping("event_filter");
    builder.setBolt("redis_join", new RedisJoinBolt(redisServerHost), parallel).shuffleGrouping("event_projection");
    builder.setBolt("campaign_processor", new CampaignProcessor(redisServerHost), parallel*2)
        .fieldsGrouping("redis_join", new Fields("campaign_id"));

    Config conf = new Config();

    if (args != null && args.length > 0) {
        conf.setNumWorkers(workers);
        conf.setNumAckers(ackers);
        StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
    }
    else {

        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("test", conf, builder.createTopology());
        backtype.storm.utils.Utils.sleep(10000);
        cluster.killTopology("test");
        cluster.shutdown();
    }
}
 
Example 47
Project: storm-hive-streaming-example   File: Topology.java   Source Code and License 4 votes vote down vote up
public void run(String... args){
    String kafkaTopic = "stock_topic";

    SpoutConfig spoutConfig = new SpoutConfig(new ZkHosts("127.0.0.1"),
            kafkaTopic, "/kafka_storm", "StormSpout");
    spoutConfig.useStartOffsetTimeIfOffsetOutOfRange = true;
    spoutConfig.startOffsetTime = System.currentTimeMillis();

    KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
    
    // Hive connection configuration
    String metaStoreURI = "thrift://one.hdp:9083";
    String dbName = "default";
    String tblName = "stock_prices";
    // Fields for possible partition
    String[] partNames = {"name"};
    // Fields for possible column data
    String[] colNames = {"day", "open", "high", "low", "close", "volume","adj_close"};
    // Record Writer configuration
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper()
            .withColumnFields(new Fields(colNames))
            .withPartitionFields(new Fields(partNames));

    HiveOptions hiveOptions;
    hiveOptions = new HiveOptions(metaStoreURI, dbName, tblName, mapper)
            .withTxnsPerBatch(2)
            .withBatchSize(100)
            .withIdleTimeout(10)
            .withCallTimeout(10000000);
            //.withKerberosKeytab(path_to_keytab)
            //.withKerberosPrincipal(krb_principal);

    TopologyBuilder builder = new TopologyBuilder();

    builder.setSpout(KAFKA_SPOUT_ID, kafkaSpout);
    builder.setBolt(STOCK_PROCESS_BOLT_ID, new StockDataBolt()).shuffleGrouping(KAFKA_SPOUT_ID);
    builder.setBolt(HIVE_BOLT_ID, new HiveBolt(hiveOptions)).shuffleGrouping(STOCK_PROCESS_BOLT_ID);
    
    String topologyName = "StormHiveStreamingTopo";
    Config config = new Config();
    config.setNumWorkers(1);
    config.setMessageTimeoutSecs(60);
    try {
        StormSubmitter.submitTopology(topologyName, config, builder.createTopology());
    } catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException ex) {
        Logger.getLogger(Topology.class.getName()).log(Level.SEVERE, null, ex);
    }
}
 
Example 48
Project: yuzhouwan   File: ZkTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {

        //这个地方其实就是kafka配置文件里边的zookeeper.connect这个参数,可以去那里拿过来
        String brokerZkStr = "10.100.90.201:2181/kafka_online_sample";
        String brokerZkPath = "/brokers";
        ZkHosts zkHosts = new ZkHosts(brokerZkStr, brokerZkPath);

        String topic = "mars-wap";
        //以下:将offset汇报到哪个zk集群,相应配置
        String offsetZkServers = "10.199.203.169";
        String offsetZkPort = "2181";
        List<String> zkServersList = new ArrayList<>();
        zkServersList.add(offsetZkServers);
        //汇报offset信息的root路径
        String offsetZkRoot = "/stormExample";
        //存储该spout id的消费offset信息,譬如以topoName来命名
        String offsetZkId = "storm-example";

        SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, topic, offsetZkRoot, offsetZkId);
        kafkaConfig.zkPort = Integer.parseInt(offsetZkPort);
        kafkaConfig.zkServers = zkServersList;
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

        KafkaSpout spout = new KafkaSpout(kafkaConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", spout, 1);
        builder.setBolt("bolt", new EsBolt("storm/docs"), 1).shuffleGrouping("spout");

        Config config = new Config();
        config.put("es.index.auto.create", "true");

        if (args.length > 0) {
            try {
                StormSubmitter.submitTopology("storm-kafka-example", config, builder.createTopology());
            } catch (Exception e) {
                LOG.error("error: {}", e.getMessage());
            }
        } else {
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("test", config, builder.createTopology());
        }
    }
 
Example 49
Project: realtime-event-processing   File: URLEventProcessingTopology.java   Source Code and License 4 votes vote down vote up
public static StormTopology buildTopology(Config conf, LocalDRPC localDrpc) {
    TridentTopology topology = new TridentTopology();

    //Kafka Spout
    BrokerHosts zk = new ZkHosts(conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_NAME) + ":" +conf.get(CrawlerConfig.KAFKA_CONSUMER_HOST_PORT));
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zk, (String) conf.get(CrawlerConfig.KAFKA_TOPIC_NAME));
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    //kafkaConfig.ignoreZkOffsets=true;
    OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(kafkaConfig);

    //ElasticSearch Persistent State
    Settings esSettings = ImmutableSettings.settingsBuilder()
            .put("storm.elasticsearch.cluster.name", conf.get(CrawlerConfig.ELASTICSEARCH_CLUSTER_NAME))
            .put("storm.elasticsearch.hosts", conf.get(CrawlerConfig.ELASTICSEARCH_HOST_NAME) + ":" + conf.get(CrawlerConfig.ELASTICSEARCH_HOST_PORT))
            .build();
    StateFactory esStateFactory = new ESIndexState.Factory<JSONObject>(new ClientFactory.NodeClient(esSettings.getAsMap()), JSONObject.class);
    TridentState esStaticState = topology.newStaticState(esStateFactory);

    //Topology
    topology.newStream("crawlKafkaSpout", spout).parallelismHint(5)
            //Splits words on receiving from Kafka
            .each(new Fields("str"), new SplitFunction(), new Fields("url", "depth", "task", "user"))
            .each(new Fields("str"), new PrintFilter("Kafka"))
            //Bloom Filter, Filters already crawled URLs
            .each(new Fields("url", "task"), new URLFilter())
            //Download and Parse Webpage
            .each(new Fields("url"), new GetAdFreeWebPage(), new Fields("content_html", "title", "href"))
            //Sending URLs present in the page into the kafka queue.
            .each(new Fields("href", "depth", "task", "user"), new KafkaProducerFilter())
            //Insert to Elasticsearch
            .each(new Fields("url", "content_html", "title", "task", "user"), new PrepareForElasticSearch(), new Fields("index", "type", "id", "source"))
            .partitionPersist(esStateFactory, new Fields("index", "type", "id", "source"), new ESIndexUpdater<String>(new ESTridentTupleMapper()), new Fields())
            ;

    //DRPC
    topology.newDRPCStream("search", localDrpc)
            .each(new Fields("args"), new SplitDRPCArgs(), new Fields("query_input", "task"))
            .each(new Fields("query_input"), new BingAutoSuggest(0), new Fields("query_preProcessed"))
            .each(new Fields("query_preProcessed", "task"), new PrepareSearchQuery(), new Fields("query", "indices", "types"))
            .groupBy(new Fields("query", "indices", "types"))
            .stateQuery(esStaticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("results"))
            ;

    return topology.build();
}
 
Example 50
Project: simple-kafka-storm-java   File: OpaqueTridentKafkaSpoutBuilder.java   Source Code and License 4 votes vote down vote up
public OpaqueTridentKafkaSpout build() {
	BrokerHosts zk = new ZkHosts(zookeeper);
	TridentKafkaConfig spoutConf = new TridentKafkaConfig(zk, topic);
	spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
	return new OpaqueTridentKafkaSpout(spoutConf);
}
 
Example 51
Project: simple-kafka-storm-java   File: KafkaSpoutBuilder.java   Source Code and License 4 votes vote down vote up
public KafkaSpout build() {
	BrokerHosts hosts = new ZkHosts(zooKeeper);
	SpoutConfig spoutConfig = new SpoutConfig(hosts, topic, "/" + topic, UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	return new KafkaSpout(spoutConfig);
}
 
Example 52
Project: Kafka-Storm-ElasticSearch   File: AuditActiveLoginsTopology.java   Source Code and License 4 votes vote down vote up
public AuditActiveLoginsTopology(String zookeeperHosts) {
	kafkaBrokerHosts = new ZkHosts(zookeeperHosts);
}
 
Example 53
Project: StormTopology-AuditActiveLogins   File: AuditActiveLoginsTopology.java   Source Code and License 4 votes vote down vote up
public AuditActiveLoginsTopology(String zookeeperHosts) {
	kafkaBrokerHosts = new ZkHosts(zookeeperHosts);
}
 
Example 54
Project: AuditTopology-ES   File: AuditActiveLoginsTopology.java   Source Code and License 4 votes vote down vote up
public AuditActiveLoginsTopology(String zookeeperHosts) {
	kafkaBrokerHosts = new ZkHosts(zookeeperHosts);
}
 
Example 55
Project: storm-kafka-0.8-plus-test   File: SentenceAggregationTopology.java   Source Code and License 4 votes vote down vote up
public SentenceAggregationTopology(String kafkaZookeeper) {
    brokerHosts = new ZkHosts(kafkaZookeeper);
}
 
Example 56
Project: openbus   File: BrokerSpout.java   Source Code and License 3 votes vote down vote up
public BrokerSpout(String kafkaTopic, String zookeeperHosts, String idClient, boolean forceFromStart) {

        zhost = new ZkHosts(zookeeperHosts);
        config = new TridentKafkaConfig(zhost, kafkaTopic); //3er parametro idClient
        config.forceFromStart = forceFromStart;
        config.startOffsetTime=-2;
       

        LOG.info("BrokerSpout. zookeperHosts: " + zookeeperHosts + " topic: " + kafkaTopic + " idClient: " + idClient);
	}