backtype.storm.spout.SchemeAsMultiScheme Java Examples

The following examples show how to use backtype.storm.spout.SchemeAsMultiScheme. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TopologyRunner.java    From opensoc-streaming with Apache License 2.0 6 votes vote down vote up
private boolean initializeKafkaSpout(String name) {
	try {

		BrokerHosts zk = new ZkHosts(config.getString("kafka.zk"));
		String input_topic = config.getString("spout.kafka.topic");
		SpoutConfig kafkaConfig = new SpoutConfig(zk, input_topic, "",
				input_topic);
		kafkaConfig.scheme = new SchemeAsMultiScheme(new RawScheme());
		kafkaConfig.forceFromStart = Boolean.valueOf("True");
		kafkaConfig.startOffsetTime = -1;

		builder.setSpout(name, new KafkaSpout(kafkaConfig),
				config.getInt("spout.kafka.parallelism.hint")).setNumTasks(
				config.getInt("spout.kafka.num.tasks"));

	} catch (Exception e) {
		e.printStackTrace();
		System.exit(0);
	}

	return true;
}
 
Example #2
Source File: UniqueVisitor.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int pvBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_PV_BOLT_NUM);
  final int uvBoltNum = BenchmarkUtils.getInt(config, UNIQUER_NUM, DEFAULT_UV_BOLT_NUM);
  final int winLen = BenchmarkUtils.getInt(config, WINDOW_LENGTH, DEFAULT_WINDOW_LENGTH_IN_SEC);
  final int emitFreq = BenchmarkUtils.getInt(config, EMIT_FREQ, DEFAULT_EMIT_FREQ_IN_SEC);
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.URL, Item.USER), pvBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(UNIQUER_ID, new UniqueVisitorBolt(winLen, emitFreq), uvBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.URL.toString()));
  return builder.createTopology();
}
 
Example #3
Source File: Grep.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int matBoltNum = BenchmarkUtils.getInt(config, FM_NUM, DEFAULT_MAT_BOLT_NUM);
  final int cntBoltNum = BenchmarkUtils.getInt(config, CM_NUM, DEFAULT_CNT_BOLT_NUM);
  final String ptnString = (String) Utils.get(config, PATTERN_STRING, DEFAULT_PATTERN_STR);

  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(FM_ID, new FindMatchingSentence(ptnString), matBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(CM_ID, new CountMatchingSentence(), cntBoltNum)
          .fieldsGrouping(FM_ID, new Fields(FindMatchingSentence.FIELDS));

  return builder.createTopology();
}
 
Example #4
Source File: AuditActiveLoginsTopology.java    From Kafka-Storm-ElasticSearch with Apache License 2.0 6 votes vote down vote up
public StormTopology buildTopology(Properties properties) {
	
	// Load properties for the storm topology
	String kafkaTopic = properties.getProperty("kafka.topic");
	
	SpoutConfig kafkaConfig = new SpoutConfig(kafkaBrokerHosts, kafkaTopic, "",	"storm");
	kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	TopologyBuilder builder = new TopologyBuilder();

	// Specific audit logs analysis bolts
	AuditLoginsCounterBolt loginCounterbolt = new AuditLoginsCounterBolt();
	AuditParserBolt auditParserBolt = new AuditParserBolt();
	
	// Elastic search bolt
	TupleMapper tupleMapper = new DefaultTupleMapper();
	ElasticSearchBolt elasticSearchBolt = new ElasticSearchBolt(tupleMapper);

	// Topology scheme: KafkaSpout -> auditParserBolt -> loginCounterBolt -> elasticSearchBolt
	builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig), 1);
	builder.setBolt("ParseBolt", auditParserBolt, 1).shuffleGrouping("KafkaSpout");
	builder.setBolt("CountBolt", loginCounterbolt, 1).shuffleGrouping("ParseBolt");
	builder.setBolt("ElasticSearchBolt", elasticSearchBolt, 1)
	.fieldsGrouping("CountBolt", new Fields("id", "index", "type", "document"));

	return builder.createTopology();
}
 
Example #5
Source File: TridentWordCount.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {
    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int splitNum = BenchmarkUtils.getInt(config, SPLIT_NUM, DEFAULT_SPLIT_BOLT_NUM);
    final int countNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

    spout  = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();

    trident.newStream("wordcount", spout).name("sentence").parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
            .parallelismHint(splitNum)
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(countNum);
/*    trident.newStream("wordcount", spout)
      .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
      .groupBy(new Fields("word"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));*/


    return trident.build();
  }
 
Example #6
Source File: PageViewCount.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int viewBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
  final int cntBoltNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.URL, Item.ONE), viewBoltNum)
         .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(COUNT_ID, new WordCount.Count(), cntBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.URL.toString()));
  return builder.createTopology();
}
 
Example #7
Source File: DataClean.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {
  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int pvBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_PV_BOLT_NUM);
  final int filterBoltNum = BenchmarkUtils.getInt(config, FILTER_NUM, DEFAULT_FILTER_BOLT_NUM);
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.STATUS, Item.ALL), pvBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(FILTER_ID, new FilterBolt<Integer>(404), filterBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.STATUS.toString()));
  return builder.createTopology();
}
 
Example #8
Source File: CounterTopology.java    From storm-kafka-examples with Apache License 2.0 5 votes vote down vote up
/**
 * @param args
 * http://www.programcreek.com/java-api-examples/index.php?api=storm.kafka.KafkaSpout
 */
public static void main(String[] args) {
	try{
		//设置喷发节点并分配并发数,该并发数将会控制该对象在集群中的线程数(6个)
		String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
		String topic = "order";
		String groupId = "id";
		int spoutNum = 3;
		int boltNum = 1;
		ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
		SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
		spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
		builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");

        Config config = new Config();
        config.setDebug(true);
        
        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {        
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
	}catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example #9
Source File: KafkaUtilsTest.java    From storm-kafka-0.8-plus with Apache License 2.0 5 votes vote down vote up
@Test
public void generateTuplesWithValueSchemeAndKeyValueMessage() {
    config.scheme = new SchemeAsMultiScheme(new StringScheme());
    String value = "value";
    String key = "key";
    createTopicAndSendMessage(key, value);
    ByteBufferMessageSet messageAndOffsets = getLastMessage();
    for (MessageAndOffset msg : messageAndOffsets) {
        Iterable<List<Object>> lists = KafkaUtils.generateTuples(config, msg.message());
        assertEquals(value, lists.iterator().next().get(0));
    }
}
 
Example #10
Source File: KafkaThroughput.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	TopologyBuilder builder = new TopologyBuilder();
	BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper"));
	SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
	builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism"));

	builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source");

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(pt.getInt("par", 2));

		StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology());
	} else {
		conf.setMaxTaskParallelism(pt.getInt("par", 2));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-spout", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}
}
 
Example #11
Source File: CorrelationSpout.java    From eagle with Apache License 2.0 5 votes vote down vote up
private MultiScheme createMultiScheme(Map conf, String topic, String schemeClsName) throws Exception {
    Object scheme = SchemeBuilder.buildFromClsName(schemeClsName, topic, conf);
    if (scheme instanceof MultiScheme) {
        return (MultiScheme) scheme;
    } else if (scheme instanceof Scheme) {
        return new SchemeAsMultiScheme((Scheme) scheme);
    } else {
        LOG.error("create spout scheme failed.");
        throw new IllegalArgumentException("create spout scheme failed.");
    }
}
 
Example #12
Source File: DRPC.java    From storm-benchmark with Apache License 2.0 4 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {

    Object sObj = config.get(SERVER);
    if (null == sObj) {
      throw new IllegalArgumentException("must set a drpc server");
    }
    server = (String) sObj;
    config.put(Config.DRPC_SERVERS, Lists.newArrayList(server));

    Object pObj = config.get(PORT);
    if (null == pObj) {
      throw new IllegalArgumentException("must set a drpc port");
    }
    port = Utils.getInt(pObj);
    config.put(Config.DRPC_PORT, port);

    LOG.info("drpc server: " + server + "; drpc port: " + port);

    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int pageNum = BenchmarkUtils.getInt(config, PAGE_NUM, DEFAULT_PAGE_BOLT_NUM);
    final int viewNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
    final int userNum = BenchmarkUtils.getInt(config, USER_NUM, DEFAULT_USER_BOLT_NUM);
    final int followerNum = BenchmarkUtils.getInt(config, FOLLOWER_NUM, DEFAULT_FOLLOWER_BOLT_NUM);

    spout = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();
    TridentState urlToUsers =
            trident.newStream("drpc", spout).parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new Extract(Arrays.asList(Item.URL, Item.USER)),
                    new Fields("url", "user")).parallelismHint(pageNum)
            .groupBy(new Fields("url"))
            .persistentAggregate(new MemoryMapState.Factory(), new Fields("url", "user"), new Distinct(), new Fields("user_set"))
            .parallelismHint(viewNum);
/** debug
 *  1. this proves that the aggregated result has successfully persisted
    urlToUsers.newValuesStream()
            .each(new Fields("url", "user_set"), new Print("(url, user_set)"), new Fields("url2", "user_set2"));
 */
    PageViewGenerator generator = new PageViewGenerator();
    TridentState userToFollowers = trident.newStaticState(new StaticSingleKeyMapState.Factory(generator.genFollowersDB()));
/** debug
  * 2. this proves that MemoryMapState could be read correctly
   trident.newStream("urlToUsers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.URL)), new Fields("url"))
            .each(new Fields("url"), new Print("url"), new Fields("url2"))
            .groupBy(new Fields("url2"))
            .stateQuery(urlToUsers, new Fields("url2"),  new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Print("users"), new Fields("users2"));
*/
/** debug
 *  3. this proves that StaticSingleKeyMapState could be read correctly
    trident.newStream("userToFollowers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.USER)), new Fields("user"))
            .each(new Fields("user"), new Print("user"), new Fields("user2"))
            .stateQuery(userToFollowers, new Fields("user2"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Print("followers"), new Fields("followers2"));
 */
    trident.newDRPCStream(FUNCTION, null)
            .each(new Fields("args"), new Print("args"), new Fields("url"))
            .groupBy(new Fields("url"))
            .stateQuery(urlToUsers, new Fields("url"), new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Expand(), new Fields("user")).parallelismHint(userNum)
            .groupBy(new Fields("user"))
            .stateQuery(userToFollowers, new Fields("user"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Expand(), new Fields("follower")).parallelismHint(followerNum)
            .groupBy(new Fields("follower"))
            .aggregate(new One(), new Fields("one"))
            .aggregate(new Fields("one"), new Sum(), new Fields("reach"));
    return trident.build();
  }
 
Example #13
Source File: ZkTopology.java    From yuzhouwan with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {

        //这个地方其实就是kafka配置文件里边的zookeeper.connect这个参数,可以去那里拿过来
        String brokerZkStr = "10.100.90.201:2181/kafka_online_sample";
        String brokerZkPath = "/brokers";
        ZkHosts zkHosts = new ZkHosts(brokerZkStr, brokerZkPath);

        String topic = "mars-wap";
        //以下:将offset汇报到哪个zk集群,相应配置
        String offsetZkServers = "10.199.203.169";
        String offsetZkPort = "2181";
        List<String> zkServersList = new ArrayList<>();
        zkServersList.add(offsetZkServers);
        //汇报offset信息的root路径
        String offsetZkRoot = "/stormExample";
        //存储该spout id的消费offset信息,譬如以topoName来命名
        String offsetZkId = "storm-example";

        SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, topic, offsetZkRoot, offsetZkId);
        kafkaConfig.zkPort = Integer.parseInt(offsetZkPort);
        kafkaConfig.zkServers = zkServersList;
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

        KafkaSpout spout = new KafkaSpout(kafkaConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", spout, 1);
        builder.setBolt("bolt", new EsBolt("storm/docs"), 1).shuffleGrouping("spout");

        Config config = new Config();
        config.put("es.index.auto.create", "true");

        if (args.length > 0) {
            try {
                StormSubmitter.submitTopology("storm-kafka-example", config, builder.createTopology());
            } catch (Exception e) {
                LOG.error("", e);
            }
        } else {
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("test", config, builder.createTopology());
        }
    }
 
Example #14
Source File: KafkaWordCount.java    From storm-benchmark with Apache License 2.0 4 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(config, new SchemeAsMultiScheme(new StringScheme())));
  return super.getTopology(config);
}
 
Example #15
Source File: KafkaStreamSource.java    From eagle with Apache License 2.0 4 votes vote down vote up
private static KafkaSpout createKafkaSpout(KafkaStreamSourceConfig config) {

        // the following is for fetching data from one topic
        // Kafka topic
        String topic = config.getTopicId();
        // Kafka broker zk connection
        String zkConnString = config.getBrokerZkQuorum();
        // Kafka fetch size
        int fetchSize = config.getFetchSize();
        LOG.info(String.format("Use topic : %s, zkQuorum : %s , fetchSize : %d", topic, zkConnString, fetchSize));

        /*
         the following is for recording offset for processing the data
         the zk path to store current offset is comprised of the following
         offset zkPath = zkRoot + "/" + topic + "/" + consumerGroupId + "/" + partition_Id

         consumerGroupId is for differentiating different consumers which consume the same topic
        */
        // transaction zkRoot
        String zkRoot = config.getTransactionZKRoot();
        // Kafka consumer group id
        String groupId = config.getConsumerGroupId();
        String brokerZkPath = config.getBrokerZkPath();

        BrokerHosts hosts;
        if (StringUtils.isNotBlank(brokerZkPath)) {
            hosts = new ZkHosts(zkConnString);
        } else {
            hosts = new ZkHosts(zkConnString, brokerZkPath);
        }

        SpoutConfig spoutConfig = new SpoutConfig(hosts,
            topic,
            zkRoot + "/" + topic,
            groupId);

        // transaction zkServers to store kafka consumer offset. Default to use storm zookeeper
        if (StringUtils.isNotBlank(config.getTransactionZkServers())) {
            String[] txZkServers = config.getTransactionZkServers().split(",");
            spoutConfig.zkServers = Arrays.stream(txZkServers).map(server -> server.split(":")[0]).collect(Collectors.toList());
            spoutConfig.zkPort = Integer.parseInt(txZkServers[0].split(":")[1]);
            LOG.info("txZkServers:" + spoutConfig.zkServers + ", zkPort:" + spoutConfig.zkPort);
        }

        // transaction update interval
        spoutConfig.stateUpdateIntervalMs = config.getTransactionStateUpdateMS();
        // Kafka fetch size
        spoutConfig.fetchSizeBytes = fetchSize;
        spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();

        // "startOffsetTime" is for test usage, prod should not use this
        if (config.getStartOffsetTime() >= 0) {
            spoutConfig.startOffsetTime = config.getStartOffsetTime();
        }
        // "forceFromStart" is for test usage, prod should not use this
        if (config.isForceFromStart()) {
            spoutConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
        }

        Preconditions.checkNotNull(config.getSchemaClass(), "schemaClass is null");
        try {
            Scheme s = config.getSchemaClass().newInstance();
            spoutConfig.scheme = new SchemeAsMultiScheme(s);
        } catch (Exception ex) {
            LOG.error("Error instantiating scheme object");
            throw new IllegalStateException(ex);
        }
        return new KafkaSpout(spoutConfig);
    }
 
Example #16
Source File: KafkaSourcedSpoutProvider.java    From Eagle with Apache License 2.0 4 votes vote down vote up
public SchemeAsMultiScheme getStreamScheme(String deserClsName, Config context) {
	return new SchemeAsMultiScheme(new KafkaSourcedSpoutScheme(deserClsName, context));
}
 
Example #17
Source File: TestUtils.java    From trident-tutorial with Apache License 2.0 4 votes vote down vote up
public static TransactionalTridentKafkaSpout testTweetSpout(BrokerHosts hosts) {
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, "test", "storm");
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new TransactionalTridentKafkaSpout(kafkaConfig);
}
 
Example #18
Source File: KafkaUtilsTest.java    From storm-kafka-0.8-plus with Apache License 2.0 4 votes vote down vote up
@Test
public void generateTupelsWithValueScheme() {
    config.scheme = new SchemeAsMultiScheme(new StringScheme());
    runGetValueOnlyTuplesTest();
}
 
Example #19
Source File: HdfsTopology.java    From storm-kafka-examples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
    try{
        String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
        String topic = "order";
        String groupId = "id";
        int spoutNum = 3;
        int boltNum = 1;
        ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
        SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        // HDFS bolt
        // use "|" instead of "," for field delimiter
        RecordFormat format = new DelimitedRecordFormat()
                .withFieldDelimiter("|");

        // sync the filesystem after every 1k tuples
        SyncPolicy syncPolicy = new CountSyncPolicy(1000);

        // rotate files when they reach 5MB
        FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);
        // FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);

        FileNameFormat fileNameFormat = new DefaultFileNameFormat()
                .withPath("/tmp/").withPrefix("order_").withExtension(".log");

        HdfsBolt hdfsBolt = new HdfsBolt()
                .withFsUrl("hdfs://wxb-1:8020")
                .withFileNameFormat(fileNameFormat)
                .withRecordFormat(format)
                .withRotationPolicy(rotationPolicy)
                .withSyncPolicy(syncPolicy);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
        builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");
        builder.setBolt("hdfs", hdfsBolt,boltNum).shuffleGrouping("counter");

        Config config = new Config();
        config.setDebug(true);

        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
    }catch (Exception e) {
        e.printStackTrace();
    }
}