Java Code Examples for org.apache.storm.hdfs.bolt.sync.CountSyncPolicy

The following are top voted examples for showing how to use org.apache.storm.hdfs.bolt.sync.CountSyncPolicy. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: hadooparchitecturebook   File: MovingAvgLocalTopologyRunner.java   Source Code and License 6 votes vote down vote up
/**
 * Create bolt which will persist ticks to HDFS.
 */
private static HdfsBolt createHdfsBolt() {

  // Use "|" instead of "," for field delimiter:
  RecordFormat format = new DelimitedRecordFormat()
    .withFieldDelimiter("|");
  // sync the filesystem after every 1k tuples:
  SyncPolicy syncPolicy = new CountSyncPolicy(100);

  // Rotate files when they reach 5MB:
  FileRotationPolicy rotationPolicy = 
    new FileSizeRotationPolicy(5.0f, Units.MB);

  // Write records to <user>/stock-ticks/ directory in HDFS:
  FileNameFormat fileNameFormat = new DefaultFileNameFormat()
    .withPath("stock-ticks/");

  HdfsBolt hdfsBolt = new HdfsBolt()
    .withFsUrl("hdfs://localhost:8020")
    .withFileNameFormat(fileNameFormat)
    .withRecordFormat(format)
    .withRotationPolicy(rotationPolicy)
    .withSyncPolicy(syncPolicy);

  return hdfsBolt;
}
 
Example 2
Project: fksm   File: KafkaTopology.java   Source Code and License 6 votes vote down vote up
private static HdfsBolt buildHdfsBolt(String hdfsUrl,String prefix, Fields fields){
    // use "|" instead of "," for field delimiter
    RecordFormat format = new DelimitedRecordFormat()
            .withFieldDelimiter(" : ").withFields(fields);

    // sync the filesystem after every 1k tuples
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);

    // rotate files
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, Units.MB);

    FileNameFormat fileNameFormat = new DefaultFileNameFormat()
            .withPath("/storm/").withPrefix(prefix).withExtension(".seq");

    HdfsBolt hdfsBolt = new HdfsBolt()
            .withFsUrl(hdfsUrl)
            .withFileNameFormat(fileNameFormat)
            .withRecordFormat(format)
            .withRotationPolicy(rotationPolicy)
            .withSyncPolicy(syncPolicy)
            .withRetryCount(5)
            .addRotationAction(new MoveStormToLogAction().withDestination("/log"));

    return hdfsBolt;
}
 
Example 3
Project: erad2016-streamprocessing   File: SentimentAnalysisTopology.java   Source Code and License 6 votes vote down vote up
private static HdfsBolt createHdfsBolt() {
    // use "|" instead of "," for field delimiter
    RecordFormat format = new DelimitedRecordFormat()
            .withFieldDelimiter("|");

    // sync the filesystem after every 1k tuples
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);

    // rotate files when they reach 5MB
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);

    FileNameFormat fileNameFormat = new DefaultFileNameFormat()
            .withPath(Properties.getString("sa.storm.hdfs_output_file"));

    return new HdfsBolt()
            .withFsUrl(Properties.getString("sa.storm.hdfs_url"))
            .withFileNameFormat(fileNameFormat)
            .withRecordFormat(format)
            .withRotationPolicy(rotationPolicy)
            .withSyncPolicy(syncPolicy);
}
 
Example 4
Project: storm-kafka-hdfs-example   File: HdfsBoltConfigBuilder.java   Source Code and License 6 votes vote down vote up
public HdfsBolt getHdfsBolt() {

        LOG.info("HDFSBOLT: Configuring the HdfsBolt");

        // Define the RecordFormat, SyncPolicy, and FileNameFormat
        RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter(fieldDelimiter);
        SyncPolicy syncPolicy = new CountSyncPolicy(syncCount);
        FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(outputLocation);

        // Configure the Bolt
        return new HdfsBolt()
                .withFsUrl(hdfsDefaultFs)
                .withFileNameFormat(fileNameFormat)
                .withRecordFormat(format)
                .withRotationPolicy(fileRotationPolicy)
                .withSyncPolicy(syncPolicy);

    }
 
Example 5
Project: storm-smoke-test   File: ConnectorUtil.java   Source Code and License 6 votes vote down vote up
public static HdfsBolt getHdfsBolt(String fsUrl, String srcDir, String rotationDir) {
    // sync the filesystem after every tuple
    SyncPolicy syncPolicy = new CountSyncPolicy(1);

    FileNameFormat fileNameFormat = new DefaultFileNameFormat()
            .withPath(srcDir)
            .withExtension(".txt");

    RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter(",");
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1f, FileSizeRotationPolicy.Units.KB);

    HdfsBolt bolt = new HdfsBolt()
            .withFsUrl(fsUrl)
            .withFileNameFormat(fileNameFormat)
            .withRecordFormat(format)
            .withSyncPolicy(syncPolicy)
            .withRotationPolicy(rotationPolicy)
            .addRotationAction(new MoveFileAction().toDestination(rotationDir));

    return bolt;
}
 
Example 6
Project: hadoop-arch-book   File: MovingAvgLocalTopologyRunner.java   Source Code and License 6 votes vote down vote up
/**
 * Create bolt which will persist ticks to HDFS.
 */
private static HdfsBolt createHdfsBolt() {

  // Use "|" instead of "," for field delimiter:
  RecordFormat format = new DelimitedRecordFormat()
    .withFieldDelimiter("|");
  // sync the filesystem after every 1k tuples:
  SyncPolicy syncPolicy = new CountSyncPolicy(100);

  // Rotate files when they reach 5MB:
  FileRotationPolicy rotationPolicy = 
    new FileSizeRotationPolicy(5.0f, Units.MB);

  // Write records to <user>/stock-ticks/ directory in HDFS:
  FileNameFormat fileNameFormat = new DefaultFileNameFormat()
    .withPath("stock-ticks/");

  HdfsBolt hdfsBolt = new HdfsBolt()
    .withFsUrl("hdfs://localhost:8020")
    .withFileNameFormat(fileNameFormat)
    .withRecordFormat(format)
    .withRotationPolicy(rotationPolicy)
    .withSyncPolicy(syncPolicy);

  return hdfsBolt;
}
 
Example 7
Project: storm-topology-examples   File: ConfigureHdfsBolt.java   Source Code and License 5 votes vote down vote up
public static void configureHdfsBolt(TopologyBuilder builder, 
                                     String delimiter, 
                                     String outputPath, 
                                     String hdfsUri,
                                     String hdfsBoltName, 
                                     String spoutName,
                                     int parallelismHint,
                                     FileRotationPolicy rotationPolicy,
                                     int syncCount) {
    
    LOG.info("HDFSBOLT: Configuring the HdfsBolt");
    
    // Define the RecordFormat, SyncPolicy, and FileNameFormat
    RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter(delimiter);
    SyncPolicy syncPolicy = new CountSyncPolicy(syncCount);
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(outputPath);
    
    // Configure the Bolt
    HdfsBolt bolt = new HdfsBolt()
            .withFsUrl(hdfsUri)
            .withFileNameFormat(fileNameFormat)
            .withRecordFormat(format)
            .withRotationPolicy(rotationPolicy)
            .withSyncPolicy(syncPolicy);
    
    // Set the Bolt
    builder.setBolt(hdfsBoltName, bolt, parallelismHint).shuffleGrouping(spoutName);

}
 
Example 8
Project: metron   File: HdfsWriter.java   Source Code and License 5 votes vote down vote up
@Override
public void init(Map stormConfig, TopologyContext topologyContext, WriterConfiguration configurations) {
  this.stormConfig = stormConfig;
  this.stellarProcessor = new StellarProcessor();
  this.fileNameFormat.prepare(stormConfig,topologyContext);
  if(syncPolicy != null) {
    //if the user has specified the sync policy, we don't want to override their wishes.
    syncPolicyCreator = new ClonedSyncPolicyCreator(syncPolicy);
  }
  else {
    //if the user has not, then we want to have the sync policy depend on the batch size.
    syncPolicyCreator = (source, config) -> new CountSyncPolicy(config == null?1:config.getBatchSize(source));
  }
}
 
Example 9
Project: metron   File: ClonedSyncPolicyCreatorTest.java   Source Code and License 5 votes vote down vote up
@Test
public void testClonedPolicy() {
  CountSyncPolicy basePolicy = new CountSyncPolicy(5);
  ClonedSyncPolicyCreator creator = new ClonedSyncPolicyCreator(basePolicy);
  //ensure cloned policy continues to work and adheres to the contract: mark on 5th call.
  SyncPolicy clonedPolicy = creator.create("blah", null);
  for(int i = 0;i < 4;++i) {
    Assert.assertFalse(clonedPolicy.mark(null, i));
  }
  Assert.assertTrue(clonedPolicy.mark(null, 5));
  //reclone policy and ensure it adheres to the original contract.
  clonedPolicy = creator.create("blah", null);
  Assert.assertFalse(clonedPolicy.mark(null, 0));
}
 
Example 10
Project: metron   File: HdfsWriterTest.java   Source Code and License 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testSingleFileIfNoStreamClosed() throws Exception {
  String function = "FORMAT('test-%s/%s', test.key, test.key)";
  WriterConfiguration config = buildWriterConfiguration(function);
  HdfsWriter writer = new HdfsWriter().withFileNameFormat(testFormat);
  writer.init(new HashMap<String, String>(), createTopologyContext(), config);

  JSONObject message = new JSONObject();
  message.put("test.key", "test.value");
  ArrayList<JSONObject> messages = new ArrayList<>();
  messages.add(message);
  ArrayList<Tuple> tuples = new ArrayList<>();

  CountSyncPolicy basePolicy = new CountSyncPolicy(5);
  ClonedSyncPolicyCreator creator = new ClonedSyncPolicyCreator(basePolicy);

  writer.write(SENSOR_NAME, config, tuples, messages);
  writer.write(SENSOR_NAME, config, tuples, messages);
  writer.close();

  File outputFolder = new File(folder.getAbsolutePath() + "/test-test.value/test.value/");

  // The message should show up twice, once in each file
  ArrayList<String> expected = new ArrayList<>();
  expected.add(message.toJSONString());
  expected.add(message.toJSONString());

  // Assert both messages are in the same file, because the stream stayed open
  Assert.assertEquals(1, outputFolder.listFiles().length);
  for (File file : outputFolder.listFiles()) {
    List<String> lines = Files.readAllLines(file.toPath());
    // One line per file
    Assert.assertEquals(2, lines.size());
    Assert.assertEquals(expected, lines);
  }
}
 
Example 11
Project: metron   File: HdfsWriterTest.java   Source Code and License 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testHandleAttemptsRotateIfStreamClosed() throws Exception {
  String function = "FORMAT('test-%s/%s', test.key, test.key)";
  WriterConfiguration config = buildWriterConfiguration(function);
  HdfsWriter writer = new HdfsWriter().withFileNameFormat(testFormat);
  writer.init(new HashMap<String, String>(), createTopologyContext(), config);

  JSONObject message = new JSONObject();
  message.put("test.key", "test.value");
  ArrayList<JSONObject> messages = new ArrayList<>();
  messages.add(message);
  ArrayList<Tuple> tuples = new ArrayList<>();

  CountSyncPolicy basePolicy = new CountSyncPolicy(5);
  ClonedSyncPolicyCreator creator = new ClonedSyncPolicyCreator(basePolicy);

  writer.write(SENSOR_NAME, config, tuples, messages);
  writer.getSourceHandler(SENSOR_NAME, "test-test.value/test.value", config).closeOutputFile();
  writer.getSourceHandler(SENSOR_NAME, "test-test.value/test.value", config).handle(message, SENSOR_NAME, config, creator);
  writer.close();

  File outputFolder = new File(folder.getAbsolutePath() + "/test-test.value/test.value/");

  // The message should show up twice, once in each file
  ArrayList<String> expected = new ArrayList<>();
  expected.add(message.toJSONString());

  // Assert this went into a new file because it actually rotated
  Assert.assertEquals(2, outputFolder.listFiles().length);
  for (File file : outputFolder.listFiles()) {
    List<String> lines = Files.readAllLines(file.toPath());
    // One line per file
    Assert.assertEquals(1, lines.size());
    Assert.assertEquals(expected, lines);
  }
}
 
Example 12
Project: storm-kafka-hdfs-starter   File: ConfigureHdfsBolt.java   Source Code and License 5 votes vote down vote up
public static void configureHdfsBolt(TopologyBuilder builder, String delimiter, String outputPath, String hdfsUri) {
    RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter(delimiter);
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);
    //FileRotationPolicy rotationPolicy = new TimedRotationPolicy(300, TimedRotationPolicy.TimeUnit.SECONDS);
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1, FileSizeRotationPolicy.Units.KB);
    FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(outputPath);
    HdfsBolt bolt = new HdfsBolt()
            .withFsUrl(hdfsUri)
            .withFileNameFormat(fileNameFormat)
            .withRecordFormat(format)
            .withRotationPolicy(rotationPolicy)
            .withSyncPolicy(syncPolicy);
    builder.setBolt("hdfsbolt", bolt, 1).shuffleGrouping("kafkaspout");

}
 
Example 13
Project: storm-crawler   File: WARCHdfsBolt.java   Source Code and License 5 votes vote down vote up
public WARCHdfsBolt() {
    super();
    FileSizeRotationPolicy rotpol = new FileSizeRotationPolicy(1.0f,
            Units.GB);
    withRecordFormat(new WARCRecordFormat());
    withRotationPolicy(rotpol);
    // dummy sync policy
    withSyncPolicy(new CountSyncPolicy(10));
    // default local filesystem
    withFsUrl("file:///");
}
 
Example 14
Project: storm-kafka-examples   File: HdfsTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {
    try{
        String zkhost = "wxb-1:2181,wxb-2:2181,wxb-3:2181";
        String topic = "order";
        String groupId = "id";
        int spoutNum = 3;
        int boltNum = 1;
        ZkHosts zkHosts = new ZkHosts(zkhost);//kafaka所在的zookeeper
        SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/order", groupId);  // create /order /id
        spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
        KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

        // HDFS bolt
        // use "|" instead of "," for field delimiter
        RecordFormat format = new DelimitedRecordFormat()
                .withFieldDelimiter("|");

        // sync the filesystem after every 1k tuples
        SyncPolicy syncPolicy = new CountSyncPolicy(1000);

        // rotate files when they reach 5MB
        FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);
        // FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);

        FileNameFormat fileNameFormat = new DefaultFileNameFormat()
                .withPath("/tmp/").withPrefix("order_").withExtension(".log");

        HdfsBolt hdfsBolt = new HdfsBolt()
                .withFsUrl("hdfs://wxb-1:8020")
                .withFileNameFormat(fileNameFormat)
                .withRecordFormat(format)
                .withRotationPolicy(rotationPolicy)
                .withSyncPolicy(syncPolicy);

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("spout", kafkaSpout, spoutNum);
        builder.setBolt("check", new CheckOrderBolt(), boltNum).shuffleGrouping("spout");
        builder.setBolt("counter", new CounterBolt(),boltNum).shuffleGrouping("check");
        builder.setBolt("hdfs", hdfsBolt,boltNum).shuffleGrouping("counter");

        Config config = new Config();
        config.setDebug(true);

        if(args!=null && args.length > 0) {
            config.setNumWorkers(2);
            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
        } else {
            config.setMaxTaskParallelism(2);

            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Wordcount-Topology", config, builder.createTopology());

            Thread.sleep(500000);

            cluster.shutdown();
        }
    }catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 15
Project: iot-masterclass   File: TruckEventProcessorKafkaTopology.java   Source Code and License 4 votes vote down vote up
public void configureHDFSBolt(TopologyBuilder builder) {
  // Use pipe as record boundary

  String rootPath = topologyConfig.getProperty("hdfs.path");
  String prefix = topologyConfig.getProperty("hdfs.file.prefix");
  String fsUrl = topologyConfig.getProperty("hdfs.url");
  String sourceMetastoreUrl = topologyConfig.getProperty("hive.metastore.url");
  String hiveStagingTableName = topologyConfig.getProperty("hive.staging.table.name");
  String databaseName = topologyConfig.getProperty("hive.database.name");
  Float rotationTimeInMinutes = Float.valueOf(topologyConfig.getProperty("hdfs.file.rotation.time.minutes"));

  RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter(",");

  //Synchronize data buffer with the filesystem every 1000 tuples
  SyncPolicy syncPolicy = new CountSyncPolicy(1000);

  // Rotate data files when they reach five MB
  //FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, Units.MB);

  //Rotate every X minutes
  FileTimeRotationPolicy rotationPolicy = new FileTimeRotationPolicy(rotationTimeInMinutes, FileTimeRotationPolicy
      .Units.MINUTES);

  //Hive Partition Action
  HiveTablePartitionAction hivePartitionAction = new HiveTablePartitionAction(sourceMetastoreUrl,
      hiveStagingTableName, databaseName, fsUrl);

  //MoveFileAction moveFileAction = new MoveFileAction().toDestination(rootPath + "/working");


  FileNameFormat fileNameFormat = new DefaultFileNameFormat()
      .withPath(rootPath + "/staging")
      .withPrefix(prefix);

  // Instantiate the HdfsBolt
  HdfsBolt hdfsBolt = new HdfsBolt()
      .withFsUrl(fsUrl)
      .withFileNameFormat(fileNameFormat)
      .withRecordFormat(format)
      .withRotationPolicy(rotationPolicy)
      .withSyncPolicy(syncPolicy)
      .addRotationAction(hivePartitionAction);

  int hdfsBoltCount = Integer.valueOf(topologyConfig.getProperty("hdfsbolt.thread.count"));
  builder.setBolt("hdfs_bolt", hdfsBolt, hdfsBoltCount).shuffleGrouping("kafkaSpout");
}
 
Example 16
Project: iot-lab   File: TruckEventProcessorKafkaTopology.java   Source Code and License 4 votes vote down vote up
public void configureHDFSBolt(TopologyBuilder builder) {
  // Use pipe as record boundary

  String rootPath = topologyConfig.getProperty("hdfs.path");
  String prefix = topologyConfig.getProperty("hdfs.file.prefix");
  String fsUrl = topologyConfig.getProperty("hdfs.url");
  String sourceMetastoreUrl = topologyConfig.getProperty("hive.metastore.url");
  String hiveStagingTableName = topologyConfig.getProperty("hive.staging.table.name");
  String databaseName = topologyConfig.getProperty("hive.database.name");
  Float rotationTimeInMinutes = Float.valueOf(topologyConfig.getProperty("hdfs.file.rotation.time.minutes"));

  RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter(",");

  //Synchronize data buffer with the filesystem every 1000 tuples
  SyncPolicy syncPolicy = new CountSyncPolicy(1000);

  // Rotate data files when they reach five MB
  //FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, Units.MB);

  //Rotate every X minutes
  FileTimeRotationPolicy rotationPolicy = new FileTimeRotationPolicy(rotationTimeInMinutes, FileTimeRotationPolicy
      .Units.MINUTES);

  //Hive Partition Action
  HiveTablePartitionAction hivePartitionAction = new HiveTablePartitionAction(sourceMetastoreUrl,
      hiveStagingTableName, databaseName, fsUrl);

  //MoveFileAction moveFileAction = new MoveFileAction().toDestination(rootPath + "/working");


  FileNameFormat fileNameFormat = new DefaultFileNameFormat()
      .withPath(rootPath + "/staging")
      .withPrefix(prefix);

  // Instantiate the HdfsBolt
  HdfsBolt hdfsBolt = new HdfsBolt()
      .withFsUrl(fsUrl)
      .withFileNameFormat(fileNameFormat)
      .withRecordFormat(format)
      .withRotationPolicy(rotationPolicy)
      .withSyncPolicy(syncPolicy)
      .addRotationAction(hivePartitionAction);

  int hdfsBoltCount = Integer.valueOf(topologyConfig.getProperty("hdfsbolt.thread.count"));
  builder.setBolt("hdfs_bolt", hdfsBolt, hdfsBoltCount).shuffleGrouping("kafkaSpout");
}
 
Example 17
Project: Big-Data-tutorial   File: WeatherTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {

		String zkIp = "localhost";

		String nimbusHost = "sandbox.hortonworks.com";

		String zookeeperHost = zkIp +":2181";

		ZkHosts zkHosts = new ZkHosts(zookeeperHost);
		List<String> zkServers = new ArrayList<String>();
		zkServers.add(zkIp);
		SpoutConfig kafkaConfig = new SpoutConfig(zkHosts, "spertus-weather-events", "/spertus-weather-events","test_id");
		kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		kafkaConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		kafkaConfig.zkServers = zkServers;
		kafkaConfig.zkRoot = "/spertus-weather-events";
		kafkaConfig.zkPort = 2181;
		kafkaConfig.forceFromStart = true;
		KafkaSpout kafkaSpout = new KafkaSpout(kafkaConfig);

		TopologyBuilder builder = new TopologyBuilder();

		HdfsBolt hdfsBolt = new HdfsBolt().withFsUrl("hdfs://sandbox.hortonworks.com:8020")
				.withFileNameFormat(new DefaultFileNameFormat().withPath("/tmp/test"))
				.withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter("|"))
				.withSyncPolicy(new CountSyncPolicy(10))
				.withRotationPolicy(new FileSizeRotationPolicy(5.0f, Units.MB));
		builder.setSpout("raw-weather-events", kafkaSpout, 1);
		builder.setBolt("filter-airports", new FilterAirportsBolt(), 1).shuffleGrouping("raw-weather-events");
		//        builder.setBolt("test-bolt", new TestBolt(), 1).shuffleGrouping("raw-weather-events");
		//        builder.setBolt("hdfs-bolt", hdfsBolt, 1).shuffleGrouping("raw-weather-events");


		Map conf = new HashMap();
		conf.put(backtype.storm.Config.TOPOLOGY_WORKERS, 4);
		conf.put(backtype.storm.Config.TOPOLOGY_DEBUG, true);
		if (args != null && args.length > 0) {
			StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
		}   else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("weather-topology", conf, builder.createTopology());
		}
	}
 
Example 18
Project: opensoc-streaming   File: TopologyRunner.java   Source Code and License 4 votes vote down vote up
private boolean initializeHDFSBolt(String topology_name, String name) {
	try {

		String messageUpstreamComponent = messageComponents
				.get(messageComponents.size() - 1);

		System.out.println("[OpenSOC] ------" + name
				+ " is initializing from " + messageUpstreamComponent);

		RecordFormat format = new DelimitedRecordFormat()
				.withFieldDelimiter(
						config.getString("bolt.hdfs.field.delimiter")
								.toString()).withFields(
						new Fields("message"));

		// sync the file system after every x number of tuples
		SyncPolicy syncPolicy = new CountSyncPolicy(Integer.valueOf(config
				.getString("bolt.hdfs.batch.size").toString()));

		// rotate files when they reach certain size
		FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(
				Float.valueOf(config.getString(
						"bolt.hdfs.file.rotation.size.in.mb").toString()),
				Units.MB);

		FileNameFormat fileNameFormat = new DefaultFileNameFormat()
				.withPath(config.getString("bolt.hdfs.wip.file.path")
						.toString());

		// Post rotate action
		MoveFileAction moveFileAction = (new MoveFileAction())
				.toDestination(config.getString(
						"bolt.hdfs.finished.file.path").toString());

		HdfsBolt hdfsBolt = new HdfsBolt()
				.withFsUrl(
						config.getString("bolt.hdfs.file.system.url")
								.toString())
				.withFileNameFormat(fileNameFormat)
				.withRecordFormat(format)
				.withRotationPolicy(rotationPolicy)
				.withSyncPolicy(syncPolicy)
				.addRotationAction(moveFileAction);
		if (config.getString("bolt.hdfs.compression.codec.class") != null) {
			hdfsBolt.withCompressionCodec(config.getString(
					"bolt.hdfs.compression.codec.class").toString());
		}

		builder.setBolt(name, hdfsBolt,
				config.getInt("bolt.hdfs.parallelism.hint"))
				.shuffleGrouping(messageUpstreamComponent, "message")
				.setNumTasks(config.getInt("bolt.hdfs.num.tasks"));

	} catch (Exception e) {
		e.printStackTrace();
		System.exit(0);
	}

	return true;
}
 
Example 19
Project: storm-sample   File: TruckEventProcessorKafkaTopology.java   Source Code and License 4 votes vote down vote up
public void configureHDFSBolt(TopologyBuilder builder) {
	// Use pipe as record boundary
	
	String rootPath = topologyConfig.getProperty("hdfs.path");
	String prefix = topologyConfig.getProperty("hdfs.file.prefix");
	String fsUrl = topologyConfig.getProperty("hdfs.url");
	String sourceMetastoreUrl = topologyConfig.getProperty("hive.metastore.url");
	String hiveStagingTableName = topologyConfig.getProperty("hive.staging.table.name");
	String databaseName = topologyConfig.getProperty("hive.database.name");
	Float rotationTimeInMinutes = Float.valueOf(topologyConfig.getProperty("hdfs.file.rotation.time.minutes"));
	
	RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter(",");

	//Synchronize data buffer with the filesystem every 1000 tuples
	SyncPolicy syncPolicy = new CountSyncPolicy(1000);

	// Rotate data files when they reach five MB
	//FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, Units.MB);
	
	//Rotate every X minutes
	FileTimeRotationPolicy rotationPolicy = new FileTimeRotationPolicy(rotationTimeInMinutes, FileTimeRotationPolicy.Units.MINUTES);
	
	//Hive Partition Action
	HiveTablePartitionAction hivePartitionAction = new HiveTablePartitionAction(sourceMetastoreUrl, hiveStagingTableName, databaseName, fsUrl);
	
	//MoveFileAction moveFileAction = new MoveFileAction().toDestination(rootPath + "/working");


	
	FileNameFormat fileNameFormat = new DefaultFileNameFormat()
			.withPath(rootPath + "/staging")
			.withPrefix(prefix);

	// Instantiate the HdfsBolt
	HdfsBolt hdfsBolt = new HdfsBolt()
			 .withFsUrl(fsUrl)
	         .withFileNameFormat(fileNameFormat)
	         .withRecordFormat(format)
	         .withRotationPolicy(rotationPolicy)
	         .withSyncPolicy(syncPolicy)
	         .addRotationAction(hivePartitionAction);
	
	int hdfsBoltCount = Integer.valueOf(topologyConfig.getProperty("hdfsbolt.thread.count"));
	builder.setBolt("hdfs_bolt", hdfsBolt, hdfsBoltCount).shuffleGrouping("kafkaSpout");
}
 
Example 20
Project: storm-hdfs   File: SequenceFileTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config config = new Config();
    config.setNumWorkers(1);

    SentenceSpout spout = new SentenceSpout();

    // sync the filesystem after every 1k tuples
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);

    // rotate files when they reach 5MB
    FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, Units.MB);

    FileNameFormat fileNameFormat = new DefaultFileNameFormat()
            .withPath("/source/")
            .withExtension(".seq");

    // create sequence format instance.
    DefaultSequenceFormat format = new DefaultSequenceFormat("timestamp", "sentence");

    SequenceFileBolt bolt = new SequenceFileBolt()
            .withFsUrl(args[0])
            .withFileNameFormat(fileNameFormat)
            .withSequenceFormat(format)
            .withRotationPolicy(rotationPolicy)
            .withSyncPolicy(syncPolicy)
            .withCompressionType(SequenceFile.CompressionType.RECORD)
            .withCompressionCodec("deflate")
            .addRotationAction(new MoveFileAction().toDestination("/dest/"));




    TopologyBuilder builder = new TopologyBuilder();

    builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, bolt, 4)
            .shuffleGrouping(SENTENCE_SPOUT_ID);


    if (args.length == 1) {
        LocalCluster cluster = new LocalCluster();

        cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
        waitForSeconds(120);
        cluster.killTopology(TOPOLOGY_NAME);
        cluster.shutdown();
        System.exit(0);
    } else if(args.length == 2) {
        StormSubmitter.submitTopology(args[1], config, builder.createTopology());
    }
}
 
Example 21
Project: storm-hdfs   File: HdfsFileTopology.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    Config config = new Config();
    config.setNumWorkers(1);

    SentenceSpout spout = new SentenceSpout();

    // sync the filesystem after every 1k tuples
    SyncPolicy syncPolicy = new CountSyncPolicy(1000);

    // rotate files when they reach 5MB
    FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimedRotationPolicy.TimeUnit.MINUTES);

    FileNameFormat fileNameFormat = new DefaultFileNameFormat()
            .withPath("/foo/")
            .withExtension(".txt");



    // use "|" instead of "," for field delimiter
    RecordFormat format = new DelimitedRecordFormat()
            .withFieldDelimiter("|");

    Yaml yaml = new Yaml();
    InputStream in = new FileInputStream(args[1]);
    Map<String, Object> yamlConf = (Map<String, Object>) yaml.load(in);
    in.close();
    config.put("hdfs.config", yamlConf);

    HdfsBolt bolt = new HdfsBolt()
            .withConfigKey("hdfs.config")
            .withFsUrl(args[0])
            .withFileNameFormat(fileNameFormat)
            .withRecordFormat(format)
            .withRotationPolicy(rotationPolicy)
            .withSyncPolicy(syncPolicy)
            .addRotationAction(new MoveFileAction().toDestination("/dest2/"));

    TopologyBuilder builder = new TopologyBuilder();

    builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
    // SentenceSpout --> MyBolt
    builder.setBolt(BOLT_ID, bolt, 4)
            .shuffleGrouping(SENTENCE_SPOUT_ID);

    if (args.length == 2) {
        LocalCluster cluster = new LocalCluster();

        cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
        waitForSeconds(120);
        cluster.killTopology(TOPOLOGY_NAME);
        cluster.shutdown();
        System.exit(0);
    } else if (args.length == 3) {
        StormSubmitter.submitTopology(args[0], config, builder.createTopology());
    } else{
        System.out.println("Usage: HdfsFileTopology [topology name] <yaml config file>");
    }
}