Java Code Examples for org.apache.flume.Event#setBody()

The following examples show how to use org.apache.flume.Event#setBody() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GenerateSearchAnalyticsDataImpl.java    From searchanalytics-bigdata with MIT License 6 votes vote down vote up
public Event getJsonEvent(
		final SearchQueryInstruction searchQueryInstruction)
		throws JsonProcessingException {
	final String searchQueryInstructionAsString = getObjectMapper()
			.writeValueAsString(searchQueryInstruction);
	// String writeValueAsString =
	// mapper.writerWithDefaultPrettyPrinter().writeValueAsString(searchQueryInstruction);
	searchEventsLogger.info(searchQueryInstructionAsString);
	final Event event = new JSONEvent();
	event.setBody(searchQueryInstructionAsString.getBytes());
	final Map<String, String> headers = new HashMap<String, String>();
	headers.put("eventId", searchQueryInstruction.getEventIdSuffix());
	headers.put("timestamp", searchQueryInstruction
			.getCreatedTimeStampInMillis().toString());
	if (searchQueryInstruction.getClickedDocId() != null) {
		if (searchQueryInstruction.getFavourite() != null
				&& searchQueryInstruction.getFavourite()) {
			headers.put("State", "FAVOURITE");
		} else {
			headers.put("State", "VIEWED");
		}
	}
	event.setHeaders(headers);
	return event;
}
 
Example 2
Source File: ActivityJsonToAvroInterceptor.java    From big-data-lite with MIT License 5 votes vote down vote up
/**
 * Adds the Schema details to the event.
 * Also converts the Json body to an encoded Avro record
 */
@Override
public Event intercept(Event event) {

    Map<String, String> headers = event.getHeaders();

    // Add schema spec to header        
    if (!headers.containsKey(key))
        headers.put(key, value);
    
    try {
        // Alter the body.  Convert to Avro and encode.
        if (event.getBody().length == 0)
            return null;
        
        Activity record = getActivityRecord(event.getBody());
        
        // Encode
        outputStream.reset();
        
        datumWriter.write(record, encoder);
        encoder.flush();            
        
        // Set the event body
        event.setBody(outputStream.toByteArray());

    }
    catch (Exception e) {
        logger.info("ERROR with JSON: " + event.getBody().toString());            
        return null;  // swallow event
    }
    
    return event;
}
 
Example 3
Source File: ElasticSearchJsonBodyEventSerializerTest.java    From searchanalytics-bigdata with MIT License 5 votes vote down vote up
@Test
public void testESJsonEventSerializer() throws IOException {
	final Event event = new JSONEvent();
	final String writeValueAsString = "{\"hostedmachinename\":\"172.16.9.582\",\"pageurl\":\"http://blahblah:/1881\",\"customerid\":376,\"sessionid\":\"1eaa6cd1-0a71-4d03-aea4-d038921f5c6a\",\"querystring\":null,\"sortorder\":\"asc\",\"pagenumber\":0,\"totalhits\":39,\"hitsshown\":11,\"timestamp\":1397220014988,\"clickeddocid\":null,\"filters\":[{\"code\":\"specification_resolution\",\"value\":\"1024 x 600\"},{\"code\":\"searchfacettype_product_type_level_2\",\"value\":\"Laptops\"}]}";
	event.setBody(writeValueAsString.getBytes());
	final Map<String, String> headers = new HashMap<String, String>();
	headers.put("eventId", UUID.randomUUID().toString());
	event.setHeaders(headers);
	((XContentBuilder) esSerializer.getContentBuilder(event)).string();
}
 
Example 4
Source File: EventBuilder.java    From mt-flume with Apache License 2.0 5 votes vote down vote up
/**
 * Instantiate an Event instance based on the provided body and headers.
 * If <code>headers</code> is <code>null</code>, then it is ignored.
 * @param body
 * @param headers
 * @return
 */
public static Event withBody(byte[] body, Map<String, String> headers) {
  Event event = new SimpleEvent();

  if(body == null) {
    body = new byte[0];
  }
  event.setBody(body);

  if (headers != null) {
    event.setHeaders(new HashMap<String, String>(headers));
  }

  return event;
}
 
Example 5
Source File: TestFormatSpeed.java    From mt-flume with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp(){
    events = new ArrayList<Event>();
    Event event = new SimpleEvent();
    Map<String, String> headers = new HashMap<String, String>();
    headers.put("category", "test");
    event.setHeaders(headers);
    event.setBody("".getBytes());
    for(int i = 0; i < 200000; i++){
        events.add(event);
    }
    
}
 
Example 6
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void testCloseOnIdle() throws IOException, EventDeliveryException, InterruptedException {
  String hdfsPath = testPath + "/idleClose";

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(hdfsPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);
  Context context = new Context();
  context.put("hdfs.path", hdfsPath);
  /*
   * All three rolling methods are disabled so the only
   * way a file can roll is through the idle timeout.
   */
  context.put("hdfs.rollCount", "0");
  context.put("hdfs.rollSize", "0");
  context.put("hdfs.rollInterval", "0");
  context.put("hdfs.batchSize", "2");
  context.put("hdfs.idleTimeout", "1");
  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Transaction txn = channel.getTransaction();
  txn.begin();
  for(int i=0; i < 10; i++) {
    Event event = new SimpleEvent();
    event.setBody(("test event " + i).getBytes());
    channel.put(event);
  }
  txn.commit();
  txn.close();

  sink.process();
  sink.process();
  Thread.sleep(1001);
  // previous file should have timed out now
  // this can throw an IOException(from the bucketWriter having idleClosed)
  // this is not an issue as the sink will retry and get a fresh bucketWriter
  // so long as the onIdleClose handler properly removes bucket writers that
  // were closed due to idling
  sink.process();
  sink.process();
  Thread.sleep(500); // shouldn't be enough for a timeout to occur
  sink.process();
  sink.process();
  sink.stop();
  FileStatus[] dirStat = fs.listStatus(dirPath);
  Path[] fList = FileUtil.stat2Paths(dirStat);
  Assert.assertEquals("Incorrect content of the directory " + StringUtils.join(fList, ","),
    2, fList.length);
  Assert.assertTrue(!fList[0].getName().endsWith(".tmp") &&
    !fList[1].getName().endsWith(".tmp"));
  fs.close();
}
 
Example 7
Source File: AppendIPInterceptor.java    From ehousechina with Apache License 2.0 4 votes vote down vote up
public Event intercept(Event arg0) {
	String eventBody = new String(arg0.getBody(),Charsets.UTF_8);
	String fmt="%s - %s";
	arg0.setBody(String.format(fmt, serviceId,eventBody).getBytes());
	return arg0;
}
 
Example 8
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
private void slowAppendTestHelper (long appendTimeout)  throws InterruptedException, IOException,
LifecycleException, EventDeliveryException, IOException {
  final String fileName = "FlumeData";
  final long rollCount = 5;
  final long batchSize = 2;
  final int numBatches = 2;
  String newPath = testPath + "/singleBucket";
  int totalEvents = 0;
  int i = 1, j = 1;

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  // create HDFS sink with slow writer
  HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory();
  sink = new HDFSEventSink(badWriterFactory);

  Context context = new Context();
  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType);
  context.put("hdfs.appendTimeout", String.valueOf(appendTimeout));
  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();
  List<String> bodies = Lists.newArrayList();
  // push the event batches into channel
  for (i = 0; i < numBatches; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
          String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);
      event.getHeaders().put("slow", "1500");
      String body = "Test." + i + "." + j;
      event.setBody(body.getBytes());
      bodies.add(body);
      channel.put(event);
      totalEvents++;
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    sink.process();
  }

  sink.stop();

  // loop through all the files generated and check their contains
  FileStatus[] dirStat = fs.listStatus(dirPath);
  Path fList[] = FileUtil.stat2Paths(dirStat);

  // check that the roll happened correctly for the given data
  // Note that we'll end up with two files with only a head
  long expectedFiles = totalEvents / rollCount;
  if (totalEvents % rollCount > 0) expectedFiles++;
  Assert.assertEquals("num files wrong, found: " +
      Lists.newArrayList(fList), expectedFiles, fList.length);
  verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
 
Example 9
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void testSlowAppendFailure() throws InterruptedException,
    LifecycleException, EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final String fileName = "FlumeData";
  final long rollCount = 5;
  final long batchSize = 2;
  final int numBatches = 2;
  String newPath = testPath + "/singleBucket";
  int i = 1, j = 1;

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  // create HDFS sink with slow writer
  HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory();
  sink = new HDFSEventSink(badWriterFactory);

  Context context = new Context();
  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType);
  context.put("hdfs.callTimeout", Long.toString(1000));
  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();

  // push the event batches into channel
  for (i = 0; i < numBatches; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
          String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);
      event.getHeaders().put("slow", "1500");
      event.setBody(("Test." + i + "." + j).getBytes());
      channel.put(event);
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    Status satus = sink.process();

    // verify that the append returned backoff due to timeotu
    Assert.assertEquals(satus, Status.BACKOFF);
  }

  sink.stop();
}
 
Example 10
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
/**
 * Ensure that when a write throws an IOException we are
 * able to continue to progress in the next process() call.
 * This relies on Transactional rollback semantics for durability and
 * the behavior of the BucketWriter class of close()ing upon IOException.
 */
@Test
public void testCloseReopen() throws InterruptedException,
    LifecycleException, EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final int numBatches = 4;
  final String fileName = "FlumeData";
  final long rollCount = 5;
  final long batchSize = 2;
  String newPath = testPath + "/singleBucket";
  int i = 1, j = 1;

  HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory();
  sink = new HDFSEventSink(badWriterFactory);

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  Context context = new Context();

  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType);

  Configurables.configure(sink, context);

  MemoryChannel channel = new MemoryChannel();
  Configurables.configure(channel, new Context());

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();
  List<String> bodies = Lists.newArrayList();
  // push the event batches into channel
  for (i = 1; i < numBatches; i++) {
    channel.getTransaction().begin();
    try {
      for (j = 1; j <= batchSize; j++) {
        Event event = new SimpleEvent();
        eventDate.clear();
        eventDate.set(2011, i, i, i, 0); // yy mm dd
        event.getHeaders().put("timestamp",
            String.valueOf(eventDate.getTimeInMillis()));
        event.getHeaders().put("hostname", "Host" + i);
        String body = "Test." + i + "." + j;
        event.setBody(body.getBytes());
        bodies.add(body);
        // inject fault
        event.getHeaders().put("fault-until-reopen", "");
        channel.put(event);
      }
      channel.getTransaction().commit();
    } finally {
      channel.getTransaction().close();
    }
    LOG.info("execute sink to process the events: " + sink.process());
  }
  LOG.info("clear any events pending due to errors: " + sink.process());
  sink.stop();

  verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
 
Example 11
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void testBadSimpleAppend() throws InterruptedException,
    LifecycleException, EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final String fileName = "FlumeData";
  final long rollCount = 5;
  final long batchSize = 2;
  final int numBatches = 4;
  String newPath = testPath + "/singleBucket";
  int totalEvents = 0;
  int i = 1, j = 1;

  HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory();
  sink = new HDFSEventSink(badWriterFactory);

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  Context context = new Context();

  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType);

  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();

  List<String> bodies = Lists.newArrayList();
  // push the event batches into channel
  for (i = 1; i < numBatches; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
          String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);

      String body = "Test." + i + "." + j;
      event.setBody(body.getBytes());
      bodies.add(body);
      // inject fault
      if ((totalEvents % 30) == 1) {
        event.getHeaders().put("fault-once", "");
      }
      channel.put(event);
      totalEvents++;
    }
    txn.commit();
    txn.close();

    LOG.info("Process events: " + sink.process());
  }
  LOG.info("Process events to end of transaction max: " + sink.process());
  LOG.info("Process events to injected fault: " + sink.process());
  LOG.info("Process events remaining events: " + sink.process());
  sink.stop();
  verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);

}
 
Example 12
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void testAppend() throws InterruptedException, LifecycleException,
    EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final long rollCount = 3;
  final long batchSize = 2;
  final String fileName = "FlumeData";

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(testPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  Context context = new Context();

  context.put("hdfs.path", testPath + "/%Y-%m-%d/%H");
  context.put("hdfs.timeZone", "UTC");
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));

  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();
  List<String> bodies = Lists.newArrayList();
  // push the event batches into channel
  for (int i = 1; i < 4; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (int j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
          String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);
      String body = "Test." + i + "." + j;
      event.setBody(body.getBytes());
      bodies.add(body);
      channel.put(event);
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    sink.process();
  }

  sink.stop();
  verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
 
Example 13
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void testSimpleAppendLocalTime() throws InterruptedException,
  LifecycleException, EventDeliveryException, IOException {
  final long currentTime = System.currentTimeMillis();
  Clock clk = new Clock() {
    @Override
    public long currentTimeMillis() {
      return currentTime;
    }
  };

  LOG.debug("Starting...");
  final String fileName = "FlumeData";
  final long rollCount = 5;
  final long batchSize = 2;
  final int numBatches = 4;
  String newPath = testPath + "/singleBucket/%s" ;
  String expectedPath = testPath + "/singleBucket/" +
    String.valueOf(currentTime/1000);
  int totalEvents = 0;
  int i = 1, j = 1;

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(expectedPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  Context context = new Context();

  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.useLocalTimeStamp", String.valueOf(true));

  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.setBucketClock(clk);
  sink.start();

  Calendar eventDate = Calendar.getInstance();
  List<String> bodies = Lists.newArrayList();

  // push the event batches into channel
  for (i = 1; i < numBatches; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
        String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);
      String body = "Test." + i + "." + j;
      event.setBody(body.getBytes());
      bodies.add(body);
      channel.put(event);
      totalEvents++;
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    sink.process();
  }

  sink.stop();

  // loop through all the files generated and check their contains
  FileStatus[] dirStat = fs.listStatus(dirPath);
  Path fList[] = FileUtil.stat2Paths(dirStat);

  // check that the roll happened correctly for the given data
  long expectedFiles = totalEvents / rollCount;
  if (totalEvents % rollCount > 0) expectedFiles++;
  Assert.assertEquals("num files wrong, found: " +
    Lists.newArrayList(fList), expectedFiles, fList.length);
  verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
  // The clock in bucketpath is static, so restore the real clock
  sink.setBucketClock(new SystemClock());
}
 
Example 14
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void testSimpleAppend() throws InterruptedException,
    LifecycleException, EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final String fileName = "FlumeData";
  final long rollCount = 5;
  final long batchSize = 2;
  final int numBatches = 4;
  String newPath = testPath + "/singleBucket";
  int totalEvents = 0;
  int i = 1, j = 1;

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  Context context = new Context();

  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));

  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();
  List<String> bodies = Lists.newArrayList();

  // push the event batches into channel
  for (i = 1; i < numBatches; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
          String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);
      String body = "Test." + i + "." + j;
      event.setBody(body.getBytes());
      bodies.add(body);
      channel.put(event);
      totalEvents++;
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    sink.process();
  }

  sink.stop();

  // loop through all the files generated and check their contains
  FileStatus[] dirStat = fs.listStatus(dirPath);
  Path fList[] = FileUtil.stat2Paths(dirStat);

  // check that the roll happened correctly for the given data
  long expectedFiles = totalEvents / rollCount;
  if (totalEvents % rollCount > 0) expectedFiles++;
  Assert.assertEquals("num files wrong, found: " +
      Lists.newArrayList(fList), expectedFiles, fList.length);
  verifyOutputSequenceFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
 
Example 15
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroAppend() throws InterruptedException, LifecycleException,
    EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final long rollCount = 3;
  final long batchSize = 2;
  final String fileName = "FlumeData";
  String newPath = testPath + "/singleTextBucket";
  int totalEvents = 0;
  int i = 1, j = 1;

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  Context context = new Context();

  // context.put("hdfs.path", testPath + "/%Y-%m-%d/%H");
  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.writeFormat", "Text");
  context.put("hdfs.fileType", "DataStream");
  context.put("serializer", "AVRO_EVENT");

  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();
  List<String> bodies = Lists.newArrayList();

  // push the event batches into channel
  for (i = 1; i < 4; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
          String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);
      String body = "Test." + i + "." + j;
      event.setBody(body.getBytes());
      bodies.add(body);
      channel.put(event);
      totalEvents++;
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    sink.process();
  }

  sink.stop();

  // loop through all the files generated and check their contains
  FileStatus[] dirStat = fs.listStatus(dirPath);
  Path fList[] = FileUtil.stat2Paths(dirStat);

  // check that the roll happened correctly for the given data
  long expectedFiles = totalEvents / rollCount;
  if (totalEvents % rollCount > 0) expectedFiles++;
  Assert.assertEquals("num files wrong, found: " +
      Lists.newArrayList(fList), expectedFiles, fList.length);
  verifyOutputAvroFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
 
Example 16
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void testTextAppend() throws InterruptedException, LifecycleException,
    EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final long rollCount = 3;
  final long batchSize = 2;
  final String fileName = "FlumeData";
  String newPath = testPath + "/singleTextBucket";
  int totalEvents = 0;
  int i = 1, j = 1;

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  Context context = new Context();

  // context.put("hdfs.path", testPath + "/%Y-%m-%d/%H");
  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.writeFormat", "Text");
  context.put("hdfs.fileType", "DataStream");

  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();
  List<String> bodies = Lists.newArrayList();

  // push the event batches into channel
  for (i = 1; i < 4; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
          String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);
      String body = "Test." + i + "." + j;
      event.setBody(body.getBytes());
      bodies.add(body);
      channel.put(event);
      totalEvents++;
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    sink.process();
  }

  sink.stop();

  // loop through all the files generated and check their contains
  FileStatus[] dirStat = fs.listStatus(dirPath);
  Path fList[] = FileUtil.stat2Paths(dirStat);

  // check that the roll happened correctly for the given data
  long expectedFiles = totalEvents / rollCount;
  if (totalEvents % rollCount > 0) expectedFiles++;
  Assert.assertEquals("num files wrong, found: " +
      Lists.newArrayList(fList), expectedFiles, fList.length);
  verifyOutputTextFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
 
Example 17
Source File: TestHDFSEventSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
public void doTestTextBatchAppend(boolean useRawLocalFileSystem)
    throws Exception {
  LOG.debug("Starting...");

  final long rollCount = 10;
  final long batchSize = 2;
  final String fileName = "FlumeData";
  String newPath = testPath + "/singleTextBucket";
  int totalEvents = 0;
  int i = 1, j = 1;

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  Context context = new Context();

  // context.put("hdfs.path", testPath + "/%Y-%m-%d/%H");
  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.rollInterval", "0");
  context.put("hdfs.rollSize", "0");
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.writeFormat", "Text");
  context.put("hdfs.useRawLocalFileSystem",
      Boolean.toString(useRawLocalFileSystem));
  context.put("hdfs.fileType", "DataStream");

  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();
  List<String> bodies = Lists.newArrayList();

  // push the event batches into channel to roll twice
  for (i = 1; i <= (rollCount*10)/batchSize; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      String body = "Test." + i + "." + j;
      event.setBody(body.getBytes());
      bodies.add(body);
      channel.put(event);
      totalEvents++;
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    sink.process();
  }

  sink.stop();

  // loop through all the files generated and check their contains
  FileStatus[] dirStat = fs.listStatus(dirPath);
  Path fList[] = FileUtil.stat2Paths(dirStat);

  // check that the roll happened correctly for the given data
  long expectedFiles = totalEvents / rollCount;
  if (totalEvents % rollCount > 0) expectedFiles++;
  Assert.assertEquals("num files wrong, found: " +
      Lists.newArrayList(fList), expectedFiles, fList.length);
  // check the contents of the all files
  verifyOutputTextFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
 
Example 18
Source File: TestRollingFileSink.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void testAppend() throws InterruptedException, LifecycleException,
    EventDeliveryException, IOException {

  Context context = new Context();

  context.put("sink.directory", tmpDir.getPath());
  context.put("sink.rollInterval", "1");
  context.put("sink.batchSize", "1");

  Configurables.configure(sink, context);

  Channel channel = new PseudoTxnMemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  for (int i = 0; i < 10; i++) {
    Event event = new SimpleEvent();

    event.setBody(("Test event " + i).getBytes());

    channel.put(event);
    sink.process();

    Thread.sleep(500);
  }

  sink.stop();

  for (String file : sink.getDirectory().list()) {
    BufferedReader reader = new BufferedReader(new FileReader(new File(
        sink.getDirectory(), file)));

    String lastLine = null;
    String currentLine = null;

    while ((currentLine = reader.readLine()) != null) {
      lastLine = currentLine;
    }

    logger.debug("Produced file:{} lastLine:{}", file, lastLine);

    reader.close();
  }
}
 
Example 19
Source File: TestRollingFileSink.java    From mt-flume with Apache License 2.0 2 votes vote down vote up
@Test
public void testAppend2() throws InterruptedException, LifecycleException,
    EventDeliveryException, IOException {

  Context context = new Context();

  context.put("sink.directory", tmpDir.getPath());
  context.put("sink.rollInterval", "0");
  context.put("sink.batchSize", "1");


  Configurables.configure(sink, context);

  Channel channel = new PseudoTxnMemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  for (int i = 0; i < 10; i++) {
    Event event = new SimpleEvent();

    event.setBody(("Test event " + i).getBytes());

    channel.put(event);
    sink.process();

    Thread.sleep(500);
  }

  sink.stop();

  for (String file : sink.getDirectory().list()) {
    BufferedReader reader = new BufferedReader(new FileReader(new File(
        sink.getDirectory(), file)));

    String lastLine = null;
    String currentLine = null;

    while ((currentLine = reader.readLine()) != null) {
      lastLine = currentLine;
      logger.debug("Produced file:{} lastLine:{}", file, lastLine);
    }


    reader.close();
  }
}