storm.kafka.StringScheme Java Examples

The following examples show how to use storm.kafka.StringScheme. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HBaseAuditLogApplication.java    From eagle with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology execute(Config config, StormEnvironment environment) {
    TopologyBuilder builder = new TopologyBuilder();
    KafkaSpoutProvider provider = new KafkaSpoutProvider();
    IRichSpout spout = provider.getSpout(config);

    HBaseAuditLogParserBolt bolt = new HBaseAuditLogParserBolt();

    int numOfSpoutTasks = config.getInt(SPOUT_TASK_NUM);
    int numOfParserTasks = config.getInt(PARSER_TASK_NUM);
    int numOfJoinTasks = config.getInt(JOIN_TASK_NUM);
    int numOfSinkTasks = config.getInt(SINK_TASK_NUM);

    builder.setSpout("ingest", spout, numOfSpoutTasks);
    BoltDeclarer boltDeclarer = builder.setBolt("parserBolt", bolt, numOfParserTasks);
    boltDeclarer.fieldsGrouping("ingest", new Fields(StringScheme.STRING_SCHEME_KEY));

    HBaseResourceSensitivityDataJoinBolt joinBolt = new HBaseResourceSensitivityDataJoinBolt(config);
    BoltDeclarer joinBoltDeclarer = builder.setBolt("joinBolt", joinBolt, numOfJoinTasks);
    joinBoltDeclarer.fieldsGrouping("parserBolt", new Fields("f1"));

    StormStreamSink sinkBolt = environment.getStreamSink("hbase_audit_log_stream",config);
    BoltDeclarer kafkaBoltDeclarer = builder.setBolt("kafkaSink", sinkBolt, numOfSinkTasks);
    kafkaBoltDeclarer.fieldsGrouping("joinBolt", new Fields("user"));
    return builder.createTopology();
}
 
Example #2
Source File: OozieAuditLogApplication.java    From eagle with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology execute(Config config, StormEnvironment environment) {
    TopologyBuilder builder = new TopologyBuilder();
    KafkaSpoutProvider provider = new KafkaSpoutProvider();
    IRichSpout spout = provider.getSpout(config);

    int numOfSpoutTasks = config.getInt(SPOUT_TASK_NUM);
    int numOfParserTask = config.getInt(PARSER_TASK_NUM);
    int numOfJoinTasks = config.getInt(JOIN_TASK_NUM);

    builder.setSpout("ingest", spout, numOfSpoutTasks);

    OozieAuditLogParserBolt parserBolt = new OozieAuditLogParserBolt();
    BoltDeclarer parserBoltDeclarer = builder.setBolt("parserBolt", parserBolt, numOfParserTask);
    parserBoltDeclarer.fieldsGrouping("ingest", new Fields(StringScheme.STRING_SCHEME_KEY));

    OozieResourceSensitivityDataJoinBolt joinBolt = new OozieResourceSensitivityDataJoinBolt(config);
    BoltDeclarer boltDeclarer = builder.setBolt("joinBolt", joinBolt, numOfJoinTasks);
    boltDeclarer.fieldsGrouping("parserBolt", new Fields("f1"));

    StormStreamSink sinkBolt = environment.getStreamSink("oozie_audit_log_stream", config);
    int numOfSinkTasks = config.getInt(SINK_TASK_NUM);
    BoltDeclarer kafkaBoltDeclarer = builder.setBolt("kafkaSink", sinkBolt, numOfSinkTasks);
    kafkaBoltDeclarer.fieldsGrouping("joinBolt", new Fields("user"));
    return builder.createTopology();
}
 
Example #3
Source File: UniqueVisitor.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int pvBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_PV_BOLT_NUM);
  final int uvBoltNum = BenchmarkUtils.getInt(config, UNIQUER_NUM, DEFAULT_UV_BOLT_NUM);
  final int winLen = BenchmarkUtils.getInt(config, WINDOW_LENGTH, DEFAULT_WINDOW_LENGTH_IN_SEC);
  final int emitFreq = BenchmarkUtils.getInt(config, EMIT_FREQ, DEFAULT_EMIT_FREQ_IN_SEC);
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.URL, Item.USER), pvBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(UNIQUER_ID, new UniqueVisitorBolt(winLen, emitFreq), uvBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.URL.toString()));
  return builder.createTopology();
}
 
Example #4
Source File: GCLogApplication.java    From eagle with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology execute(Config config, StormEnvironment environment) {
    TopologyBuilder builder = new TopologyBuilder();
    KafkaSpoutProvider provider = new KafkaSpoutProvider();
    IRichSpout spout = provider.getSpout(config);

    int numOfSpoutTasks = config.getInt(SPOUT_TASK_NUM);
    int numOfAnalyzerTasks = config.getInt(ANALYZER_TASK_NUM);
    int numOfGeneratorTasks = config.getInt(GENERATOR_TASK_NUM);
    int numOfSinkTasks = config.getInt(SINK_TASK_NUM);

    builder.setSpout("ingest", spout, numOfSpoutTasks);

    GCLogAnalyzerBolt bolt = new GCLogAnalyzerBolt();
    BoltDeclarer boltDeclarer = builder.setBolt("analyzerBolt", bolt, numOfAnalyzerTasks);
    boltDeclarer.fieldsGrouping("ingest", new Fields(StringScheme.STRING_SCHEME_KEY));

    GCMetricGeneratorBolt generatorBolt = new GCMetricGeneratorBolt(config);
    BoltDeclarer joinBoltDeclarer = builder.setBolt("generatorBolt", generatorBolt, numOfGeneratorTasks);
    joinBoltDeclarer.fieldsGrouping("analyzerBolt", new Fields("f1"));

    StormStreamSink sinkBolt = environment.getStreamSink("gc_log_stream",config);
    BoltDeclarer kafkaBoltDeclarer = builder.setBolt("kafkaSink", sinkBolt, numOfSinkTasks);
    kafkaBoltDeclarer.fieldsGrouping("generatorBolt", new Fields("f1"));
    return builder.createTopology();
}
 
Example #5
Source File: Grep.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int matBoltNum = BenchmarkUtils.getInt(config, FM_NUM, DEFAULT_MAT_BOLT_NUM);
  final int cntBoltNum = BenchmarkUtils.getInt(config, CM_NUM, DEFAULT_CNT_BOLT_NUM);
  final String ptnString = (String) Utils.get(config, PATTERN_STRING, DEFAULT_PATTERN_STR);

  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(FM_ID, new FindMatchingSentence(ptnString), matBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(CM_ID, new CountMatchingSentence(), cntBoltNum)
          .fieldsGrouping(FM_ID, new Fields(FindMatchingSentence.FIELDS));

  return builder.createTopology();
}
 
Example #6
Source File: AuditActiveLoginsTopology.java    From Kafka-Storm-ElasticSearch with Apache License 2.0 6 votes vote down vote up
public StormTopology buildTopology(Properties properties) {
	
	// Load properties for the storm topology
	String kafkaTopic = properties.getProperty("kafka.topic");
	
	SpoutConfig kafkaConfig = new SpoutConfig(kafkaBrokerHosts, kafkaTopic, "",	"storm");
	kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	TopologyBuilder builder = new TopologyBuilder();

	// Specific audit logs analysis bolts
	AuditLoginsCounterBolt loginCounterbolt = new AuditLoginsCounterBolt();
	AuditParserBolt auditParserBolt = new AuditParserBolt();
	
	// Elastic search bolt
	TupleMapper tupleMapper = new DefaultTupleMapper();
	ElasticSearchBolt elasticSearchBolt = new ElasticSearchBolt(tupleMapper);

	// Topology scheme: KafkaSpout -> auditParserBolt -> loginCounterBolt -> elasticSearchBolt
	builder.setSpout("KafkaSpout", new KafkaSpout(kafkaConfig), 1);
	builder.setBolt("ParseBolt", auditParserBolt, 1).shuffleGrouping("KafkaSpout");
	builder.setBolt("CountBolt", loginCounterbolt, 1).shuffleGrouping("ParseBolt");
	builder.setBolt("ElasticSearchBolt", elasticSearchBolt, 1)
	.fieldsGrouping("CountBolt", new Fields("id", "index", "type", "document"));

	return builder.createTopology();
}
 
Example #7
Source File: TridentWordCount.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {
    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int splitNum = BenchmarkUtils.getInt(config, SPLIT_NUM, DEFAULT_SPLIT_BOLT_NUM);
    final int countNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

    spout  = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();

    trident.newStream("wordcount", spout).name("sentence").parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
            .parallelismHint(splitNum)
            .groupBy(new Fields("word"))
            .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
            .parallelismHint(countNum);
/*    trident.newStream("wordcount", spout)
      .each(new Fields(StringScheme.STRING_SCHEME_KEY), new WordSplit(), new Fields("word"))
      .groupBy(new Fields("word"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));*/


    return trident.build();
  }
 
Example #8
Source File: PageViewCount.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {

  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int viewBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
  final int cntBoltNum = BenchmarkUtils.getInt(config, COUNT_NUM, DEFAULT_COUNT_BOLT_NUM);

  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.URL, Item.ONE), viewBoltNum)
         .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(COUNT_ID, new WordCount.Count(), cntBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.URL.toString()));
  return builder.createTopology();
}
 
Example #9
Source File: DataClean.java    From storm-benchmark with Apache License 2.0 6 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {
  final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
  final int pvBoltNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_PV_BOLT_NUM);
  final int filterBoltNum = BenchmarkUtils.getInt(config, FILTER_NUM, DEFAULT_FILTER_BOLT_NUM);
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(
          config, new SchemeAsMultiScheme(new StringScheme())));

  TopologyBuilder builder = new TopologyBuilder();
  builder.setSpout(SPOUT_ID, spout, spoutNum);
  builder.setBolt(VIEW_ID, new PageViewBolt(Item.STATUS, Item.ALL), pvBoltNum)
          .localOrShuffleGrouping(SPOUT_ID);
  builder.setBolt(FILTER_ID, new FilterBolt<Integer>(404), filterBoltNum)
          .fieldsGrouping(VIEW_ID, new Fields(Item.STATUS.toString()));
  return builder.createTopology();
}
 
Example #10
Source File: CounterTopology.java    From storm-kafka-examples with Apache License 2.0 5 votes vote down vote up
/**
 * @param args
 * http://www.programcreek.com/java-api-examples/index.php?api=storm.kafka.KafkaSpout
 */
public static void main(String[] args) {
	try{
		//设置喷发节点并分配并发数,该并发数将会控制该对象在集群中的线程数(6个)
		String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
		String topic = "order";
		String groupId = "id";
		int spoutNum = 3;
		int boltNum = 1;
		ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
		SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
		spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
		builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");

        Config config = new Config();
        config.setDebug(true);
        
        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {        
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
	}catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example #11
Source File: KafkaThroughput.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	TopologyBuilder builder = new TopologyBuilder();
	BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper"));
	SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
	builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism"));

	builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source");

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(pt.getInt("par", 2));

		StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology());
	} else {
		conf.setMaxTaskParallelism(pt.getInt("par", 2));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-spout", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}
}
 
Example #12
Source File: DRPC.java    From storm-benchmark with Apache License 2.0 4 votes vote down vote up
@Override
  public StormTopology getTopology(Config config) {

    Object sObj = config.get(SERVER);
    if (null == sObj) {
      throw new IllegalArgumentException("must set a drpc server");
    }
    server = (String) sObj;
    config.put(Config.DRPC_SERVERS, Lists.newArrayList(server));

    Object pObj = config.get(PORT);
    if (null == pObj) {
      throw new IllegalArgumentException("must set a drpc port");
    }
    port = Utils.getInt(pObj);
    config.put(Config.DRPC_PORT, port);

    LOG.info("drpc server: " + server + "; drpc port: " + port);

    final int spoutNum = BenchmarkUtils.getInt(config, SPOUT_NUM, DEFAULT_SPOUT_NUM);
    final int pageNum = BenchmarkUtils.getInt(config, PAGE_NUM, DEFAULT_PAGE_BOLT_NUM);
    final int viewNum = BenchmarkUtils.getInt(config, VIEW_NUM, DEFAULT_VIEW_BOLT_NUM);
    final int userNum = BenchmarkUtils.getInt(config, USER_NUM, DEFAULT_USER_BOLT_NUM);
    final int followerNum = BenchmarkUtils.getInt(config, FOLLOWER_NUM, DEFAULT_FOLLOWER_BOLT_NUM);

    spout = new TransactionalTridentKafkaSpout(
            KafkaUtils.getTridentKafkaConfig(config, new SchemeAsMultiScheme(new StringScheme())));

    TridentTopology trident = new TridentTopology();
    TridentState urlToUsers =
            trident.newStream("drpc", spout).parallelismHint(spoutNum).shuffle()
            .each(new Fields(StringScheme.STRING_SCHEME_KEY), new Extract(Arrays.asList(Item.URL, Item.USER)),
                    new Fields("url", "user")).parallelismHint(pageNum)
            .groupBy(new Fields("url"))
            .persistentAggregate(new MemoryMapState.Factory(), new Fields("url", "user"), new Distinct(), new Fields("user_set"))
            .parallelismHint(viewNum);
/** debug
 *  1. this proves that the aggregated result has successfully persisted
    urlToUsers.newValuesStream()
            .each(new Fields("url", "user_set"), new Print("(url, user_set)"), new Fields("url2", "user_set2"));
 */
    PageViewGenerator generator = new PageViewGenerator();
    TridentState userToFollowers = trident.newStaticState(new StaticSingleKeyMapState.Factory(generator.genFollowersDB()));
/** debug
  * 2. this proves that MemoryMapState could be read correctly
   trident.newStream("urlToUsers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.URL)), new Fields("url"))
            .each(new Fields("url"), new Print("url"), new Fields("url2"))
            .groupBy(new Fields("url2"))
            .stateQuery(urlToUsers, new Fields("url2"),  new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Print("users"), new Fields("users2"));
*/
/** debug
 *  3. this proves that StaticSingleKeyMapState could be read correctly
    trident.newStream("userToFollowers", new PageViewSpout(false))
            .each(new Fields("page_view"), new Extract(Arrays.asList(Item.USER)), new Fields("user"))
            .each(new Fields("user"), new Print("user"), new Fields("user2"))
            .stateQuery(userToFollowers, new Fields("user2"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Print("followers"), new Fields("followers2"));
 */
    trident.newDRPCStream(FUNCTION, null)
            .each(new Fields("args"), new Print("args"), new Fields("url"))
            .groupBy(new Fields("url"))
            .stateQuery(urlToUsers, new Fields("url"), new MapGet(), new Fields("users"))
            .each(new Fields("users"), new Expand(), new Fields("user")).parallelismHint(userNum)
            .groupBy(new Fields("user"))
            .stateQuery(userToFollowers, new Fields("user"), new MapGet(), new Fields("followers"))
            .each(new Fields("followers"), new Expand(), new Fields("follower")).parallelismHint(followerNum)
            .groupBy(new Fields("follower"))
            .aggregate(new One(), new Fields("one"))
            .aggregate(new Fields("one"), new Sum(), new Fields("reach"));
    return trident.build();
  }
 
Example #13
Source File: ZkTopology.java    From yuzhouwan with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {

        //这个地方其实就是kafka配置文件里边的zookeeper.connect这个参数,可以去那里拿过来
        String brokerZkStr = "10.100.90.201:2181/kafka_online_sample";
        String brokerZkPath = "/brokers";
        ZkHosts zkHosts = new ZkHosts(brokerZkStr, brokerZkPath);

        String topic = "mars-wap";
        //以下:将offset汇报到哪个zk集群,相应配置
        String offsetZkServers = "10.199.203.169";
        String offsetZkPort = "2181";
        List<String> zkServersList = new ArrayList<>();
        zkServersList.add(offsetZkServers);
        //汇报offset信息的root路径
        String offsetZkRoot = "/stormExample";
        //存储该spout id的消费offset信息,譬如以topoName来命名
        String offsetZkId = "storm-example";

        SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, topic, offsetZkRoot, offsetZkId);
        kafkaConfig.zkPort = Integer.parseInt(offsetZkPort);
        kafkaConfig.zkServers = zkServersList;
        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

        KafkaSpout spout = new KafkaSpout(kafkaConfig);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", spout, 1);
        builder.setBolt("bolt", new EsBolt("storm/docs"), 1).shuffleGrouping("spout");

        Config config = new Config();
        config.put("es.index.auto.create", "true");

        if (args.length > 0) {
            try {
                StormSubmitter.submitTopology("storm-kafka-example", config, builder.createTopology());
            } catch (Exception e) {
                LOG.error("", e);
            }
        } else {
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("test", config, builder.createTopology());
        }
    }
 
Example #14
Source File: KafkaWordCount.java    From storm-benchmark with Apache License 2.0 4 votes vote down vote up
@Override
public StormTopology getTopology(Config config) {
  spout = new KafkaSpout(KafkaUtils.getSpoutConfig(config, new SchemeAsMultiScheme(new StringScheme())));
  return super.getTopology(config);
}
 
Example #15
Source File: MessageJsonScheme.java    From eagle with Apache License 2.0 4 votes vote down vote up
@Override
public Fields getOutputFields() {
    return new Fields(StringScheme.STRING_SCHEME_KEY);
}
 
Example #16
Source File: HdfsTopology.java    From storm-kafka-examples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
    try{
        String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
        String topic = "order";
        String groupId = "id";
        int spoutNum = 3;
        int boltNum = 1;
        ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
        SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        // HDFS bolt
        // use "|" instead of "," for field delimiter
        RecordFormat format = new DelimitedRecordFormat()
                .withFieldDelimiter("|");

        // sync the filesystem after every 1k tuples
        SyncPolicy syncPolicy = new CountSyncPolicy(1000);

        // rotate files when they reach 5MB
        FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);
        // FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);

        FileNameFormat fileNameFormat = new DefaultFileNameFormat()
                .withPath("/tmp/").withPrefix("order_").withExtension(".log");

        HdfsBolt hdfsBolt = new HdfsBolt()
                .withFsUrl("hdfs://wxb-1:8020")
                .withFileNameFormat(fileNameFormat)
                .withRecordFormat(format)
                .withRotationPolicy(rotationPolicy)
                .withSyncPolicy(syncPolicy);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
        builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");
        builder.setBolt("hdfs", hdfsBolt,boltNum).shuffleGrouping("counter");

        Config config = new Config();
        config.setDebug(true);

        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
    }catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example #17
Source File: TestUtils.java    From trident-tutorial with Apache License 2.0 4 votes vote down vote up
public static TransactionalTridentKafkaSpout testTweetSpout(BrokerHosts hosts) {
    TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, "test", "storm");
    kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
    return new TransactionalTridentKafkaSpout(kafkaConfig);
}