Java Code Examples for org.apache.hadoop.fs.FSDataOutputStream#writeLong()

The following examples show how to use org.apache.hadoop.fs.FSDataOutputStream#writeLong() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestShuffleHandler.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private static void createIndexFile(File indexFile, Configuration conf)
    throws IOException {
  if (indexFile.exists()) {
    System.out.println("Deleting existing file");
    indexFile.delete();
  }
  indexFile.createNewFile();
  FSDataOutputStream output = FileSystem.getLocal(conf).getRaw().append(
      new Path(indexFile.getAbsolutePath()));
  Checksum crc = new PureJavaCrc32();
  crc.reset();
  CheckedOutputStream chk = new CheckedOutputStream(output, crc);
  String msg = "Writing new index file. This file will be used only " +
      "for the testing.";
  chk.write(Arrays.copyOf(msg.getBytes(),
      MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH));
  output.writeLong(chk.getChecksum().getValue());
  output.close();
}
 
Example 2
Source File: TestShuffleHandler.java    From big-c with Apache License 2.0 6 votes vote down vote up
private static void createIndexFile(File indexFile, Configuration conf)
    throws IOException {
  if (indexFile.exists()) {
    System.out.println("Deleting existing file");
    indexFile.delete();
  }
  indexFile.createNewFile();
  FSDataOutputStream output = FileSystem.getLocal(conf).getRaw().append(
      new Path(indexFile.getAbsolutePath()));
  Checksum crc = new PureJavaCrc32();
  crc.reset();
  CheckedOutputStream chk = new CheckedOutputStream(output, crc);
  String msg = "Writing new index file. This file will be used only " +
      "for the testing.";
  chk.write(Arrays.copyOf(msg.getBytes(),
      MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH));
  output.writeLong(chk.getChecksum().getValue());
  output.close();
}
 
Example 3
Source File: TestIndexCache.java    From tez with Apache License 2.0 5 votes vote down vote up
private static void writeFile(FileSystem fs, Path f, long fill, int parts)
    throws IOException {
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      dout.writeLong(fill);
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
}
 
Example 4
Source File: SpoolingRawBatchBuffer.java    From Bats with Apache License 2.0 5 votes vote down vote up
public void writeToStream(FSDataOutputStream stream) throws IOException {
  Stopwatch watch = Stopwatch.createStarted();
  available = false;
  check = ThreadLocalRandom.current().nextLong();
  start = stream.getPos();
  logger.debug("Writing check value {} at position {}", check, start);
  stream.writeLong(check);
  batch.getHeader().writeDelimitedTo(stream);
  ByteBuf buf = batch.getBody();
  if (buf != null) {
    bodyLength = buf.capacity();
  } else {
    bodyLength = 0;
  }
  if (bodyLength > 0) {
    buf.getBytes(0, stream, bodyLength);
  }
  stream.hsync();
  FileStatus status = fs.getFileStatus(path);
  long len = status.getLen();
  logger.debug("After spooling batch, stream at position {}. File length {}", stream.getPos(), len);
  batch.sendOk();
  latch.countDown();
  long t = watch.elapsed(TimeUnit.MICROSECONDS);
  logger.debug("Took {} us to spool {} to disk. Rate {} mb/s", t, bodyLength, bodyLength / t);
  if (buf != null) {
    buf.release();
  }
}
 
Example 5
Source File: TestIndexCache.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test
public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(INDEX_CACHE_MB, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}
 
Example 6
Source File: TestIndexCache.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private static void writeFile(FileSystem fs, Path f, long fill, int parts)
    throws IOException {
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      dout.writeLong(fill);
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
}
 
Example 7
Source File: TestIndexCache.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testBadIndex() throws Exception {
  final int parts = 30;
  JobConf conf = new JobConf();
  FileSystem fs = FileSystem.getLocal(conf).getRaw();
  Path p = new Path(System.getProperty("test.build.data", "/tmp"),
      "cache").makeQualified(fs);
  fs.delete(p, true);
  conf.setInt("mapred.tasktracker.indexcache.mb", 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f);
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}
 
Example 8
Source File: HdfsDirectory.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void writeFileCache(FSDataOutputStream outputStream) throws IOException {
  Set<Entry<String, FStat>> entrySet = _cache.entrySet();
  outputStream.writeInt(_cache.size());
  for (Entry<String, FStat> e : entrySet) {
    String name = e.getKey();
    FStat fstat = e.getValue();
    writeString(outputStream, name);
    outputStream.writeLong(fstat._lastMod);
    outputStream.writeLong(fstat._length);
  }
}
 
Example 9
Source File: TestUtils.java    From succinct with Apache License 2.0 5 votes vote down vote up
public static FSDataInputStream getStream(LongBuffer buf) throws IOException {
  File tmpDir = Files.createTempDir();
  Path filePath = new Path(tmpDir.getAbsolutePath() + "/testOut");
  FileSystem fs = FileSystem.get(filePath.toUri(), new Configuration());
  FSDataOutputStream fOut = fs.create(filePath);
  buf.rewind();
  while (buf.hasRemaining()) {
    fOut.writeLong(buf.get());
  }
  fOut.close();
  buf.rewind();
  return fs.open(filePath);
}
 
Example 10
Source File: TestIndexCache.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void testBadIndex() throws Exception {
  final int parts = 30;
  JobConf conf = new JobConf();
  FileSystem fs = FileSystem.getLocal(conf).getRaw();
  Path p = new Path(System.getProperty("test.build.data", "/tmp"),
      "cache").makeQualified(fs);
  fs.delete(p, true);
  conf.setInt("mapred.tasktracker.indexcache.mb", 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f);
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}
 
Example 11
Source File: TestIndexCache.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static void writeFile(FileSystem fs, Path f, long fill, int parts)
    throws IOException {
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      dout.writeLong(fill);
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
}
 
Example 12
Source File: TestIndexCache.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}
 
Example 13
Source File: TestIndexCache.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void writeFile(FileSystem fs, Path f, long fill, int parts)
    throws IOException {
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      dout.writeLong(fill);
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
}
 
Example 14
Source File: TestIndexCache.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}
 
Example 15
Source File: TestHoodieLogFormat.java    From hudi with Apache License 2.0 4 votes vote down vote up
@ParameterizedTest
@ValueSource(booleans = {true, false})
public void testAvroLogRecordReaderWithMixedInsertsCorruptsAndRollback(boolean readBlocksLazily)
    throws IOException, URISyntaxException, InterruptedException {

  // Write a 3 Data blocs with same InstantTime (written in same batch)
  Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
  // Set a small threshold so that every block is a new version
  Writer writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();

  // Write 1
  List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
  Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
  HoodieDataBlock dataBlock = getDataBlock(records1, header);
  writer = writer.appendBlock(dataBlock);
  writer = writer.appendBlock(dataBlock);
  writer = writer.appendBlock(dataBlock);

  writer.close();
  // Append some arbit byte[] to the end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  outputStream.writeLong(1000);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(100);
  outputStream.flush();
  outputStream.close();

  // Append some arbit byte[] to the end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  outputStream.writeLong(1000);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(100);
  outputStream.flush();
  outputStream.close();

  writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();

  writer = writer.appendBlock(dataBlock);
  writer.close();

  // Append some arbit byte[] to the end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  outputStream.writeLong(1000);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(100);
  outputStream.flush();
  outputStream.close();

  writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  // Write 1 rollback block for the last commit instant
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
  header.put(HeaderMetadataType.TARGET_INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
      String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
  HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
  writer = writer.appendBlock(commandBlock);
  writer.close();

  List<String> allLogFiles =
      FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
          .map(s -> s.getPath().toString()).collect(Collectors.toList());

  HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101",
      10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
  assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
}
 
Example 16
Source File: TestHoodieLogFormat.java    From hudi with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroLogRecordReaderWithRollbackPartialBlock()
    throws IOException, URISyntaxException, InterruptedException {
  Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
  // Set a small threshold so that every block is a new version
  Writer writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();

  // Write 1
  List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
  List<IndexedRecord> copyOfRecords1 = records1.stream()
      .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
  Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
  HoodieDataBlock dataBlock = getDataBlock(records1, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  // Write 2
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
  // Append some arbit byte[] to thee end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(1000);

  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());

  // Write out some header
  outputStream.write(HoodieLogBlock.getLogMetadataBytes(header));
  outputStream.writeLong("something-random".getBytes().length);
  outputStream.write("something-random".getBytes());
  outputStream.flush();
  outputStream.close();

  // Rollback the last write
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "102");
  header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "101");
  header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
      String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
  HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
  writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  writer = writer.appendBlock(commandBlock);

  // Write 3
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "103");
  List<IndexedRecord> records3 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
  List<IndexedRecord> copyOfRecords3 = records3.stream()
      .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());

  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
  dataBlock = getDataBlock(records3, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  List<String> allLogFiles =
      FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
          .map(s -> s.getPath().toString()).collect(Collectors.toList());

  HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "103",
      10240L, true, false, bufferSize, BASE_OUTPUT_PATH);
  assertEquals(200, scanner.getTotalLogRecords(), "We would read 200 records");
  Set<String> readKeys = new HashSet<>(200);
  scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
  assertEquals(200, readKeys.size(), "Stream collect should return all 200 records");
  copyOfRecords1.addAll(copyOfRecords3);
  Set<String> originalKeys =
      copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
          .collect(Collectors.toSet());
  assertEquals(originalKeys, readKeys, "CompositeAvroLogReader should return 200 records from 2 versions");
}
 
Example 17
Source File: TestHoodieLogFormat.java    From hudi with Apache License 2.0 4 votes vote down vote up
@Test
public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxException, InterruptedException {
  Writer writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
  Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  HoodieDataBlock dataBlock = getDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  // Append some arbit byte[] to thee end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(474);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(400);
  // Write out incomplete content
  outputStream.write("something-random".getBytes());
  outputStream.flush();
  outputStream.close();

  // Append a proper block that is of the missing length of the corrupted block
  writer =
          HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
                  .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  records = SchemaTestUtil.generateTestRecords(0, 10);
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  dataBlock = getDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  // First round of reads - we should be able to read the first block and then EOF
  Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema());
  assertTrue(reader.hasNext(), "First block should be available");
  reader.next();
  assertTrue(reader.hasNext(), "We should have corrupted block next");
  HoodieLogBlock block = reader.next();
  assertEquals(HoodieLogBlockType.CORRUPT_BLOCK, block.getBlockType(), "The read block should be a corrupt block");
  assertTrue(reader.hasNext(), "Third block should be available");
  reader.next();
  assertFalse(reader.hasNext(), "There should be no more block left");

  reader.close();

  // Simulate another failure back to back
  outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(1000);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(500);
  // Write out some bytes
  outputStream.write("something-else-random".getBytes());
  outputStream.flush();
  outputStream.close();

  // Should be able to append a new block
  writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  records = SchemaTestUtil.generateTestRecords(0, 100);
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  dataBlock = getDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  // Second round of reads - we should be able to read the first and last block
  reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema());
  assertTrue(reader.hasNext(), "First block should be available");
  reader.next();
  assertTrue(reader.hasNext(), "We should get the 1st corrupted block next");
  reader.next();
  assertTrue(reader.hasNext(), "Third block should be available");
  reader.next();
  assertTrue(reader.hasNext(), "We should get the 2nd corrupted block next");
  block = reader.next();
  assertEquals(HoodieLogBlockType.CORRUPT_BLOCK, block.getBlockType(), "The read block should be a corrupt block");
  assertTrue(reader.hasNext(), "We should get the last block next");
  reader.next();
  assertFalse(reader.hasNext(), "We should have no more blocks left");
  reader.close();
}
 
Example 18
Source File: TestRecoveryHdfs.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testTruncatedLog() throws Exception {
  try {
    TestInjection.skipIndexWriterCommitOnClose = true;
    final Semaphore logReplay = new Semaphore(0);
    final Semaphore logReplayFinish = new Semaphore(0);

    UpdateLog.testing_logReplayHook = () -> {
      try {
        assertTrue(logReplay.tryAcquire(TIMEOUT, TimeUnit.SECONDS));
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    };

    UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release();

    String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

    clearIndex();
    assertU(commit());

    assertU(adoc("id","F1"));
    assertU(adoc("id","F2"));
    assertU(adoc("id","F3"));
    
    h.close();
    

    
    String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
    Arrays.sort(files);

    FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length-1]));
  
    dos.writeLong(0xffffffffffffffffL);
    dos.writeChars("This should be appended to a good log file, representing a bad partially written record.");
    dos.close();

    logReplay.release(1000);
    logReplayFinish.drainPermits();
    ignoreException("OutOfBoundsException");  // this is what the corrupted log currently produces... subject to change.
    createCore();
    assertTrue(logReplayFinish.tryAcquire(TIMEOUT, TimeUnit.SECONDS));
    resetExceptionIgnores();
    assertJQ(req("q","*:*") ,"/response/numFound==3");

    //
    // Now test that the bad log file doesn't mess up retrieving latest versions
    //

    updateJ(jsonAdd(sdoc("id","F4", "_version_","104")), params(DISTRIB_UPDATE_PARAM,FROM_LEADER));
    updateJ(jsonAdd(sdoc("id","F5", "_version_","105")), params(DISTRIB_UPDATE_PARAM,FROM_LEADER));
    updateJ(jsonAdd(sdoc("id","F6", "_version_","106")), params(DISTRIB_UPDATE_PARAM,FROM_LEADER));

    // This currently skips the bad log file and also returns the version of the clearIndex (del *:*)
    // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]");
    assertJQ(req("qt","/get", "getVersions","3"), "/versions==[106,105,104]");

  } finally {
    UpdateLog.testing_logReplayHook = null;
    UpdateLog.testing_logReplayFinishHook = null;
  }
}
 
Example 19
Source File: SSTableIndexIndex.java    From hadoop-sstable with Apache License 2.0 4 votes vote down vote up
/**
 * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks
 * (splits) based on the configured chunk size.
 *
 * @param fileSystem Hadoop file system.
 * @param sstablePath SSTable Index.db.
 * @throws IOException
 */
public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException {

    final Configuration configuration = fileSystem.getConf();

    final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB,
            HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024;

    final Closer closer = Closer.create();

    final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX);
    final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX);

    boolean success = false;
    try {
        final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath));

        final TLongArrayList splitOffsets = new TLongArrayList();
        long currentStart = 0;
        long currentEnd = 0;
        final IndexOffsetScanner index = closer.register(new IndexOffsetScanner(sstablePath, fileSystem));

        while (index.hasNext()) {
            // NOTE: This does not give an exact size of this split in bytes but a rough estimate.
            // This should be good enough since it's only used for sorting splits by size in hadoop land.
            while (currentEnd - currentStart < splitSize && index.hasNext()) {
                currentEnd = index.next();
                splitOffsets.add(currentEnd);
            }

            // Record the split
            final long[] offsets = splitOffsets.toArray();
            os.writeLong(offsets[0]); // Start
            os.writeLong(offsets[offsets.length - 1]); // End

            // Clear the offsets
            splitOffsets.clear();

            if (index.hasNext()) {
                currentStart = index.next();
                currentEnd = currentStart;
                splitOffsets.add(currentStart);
            }
        }

        success = true;
    } finally {
        closer.close();

        if (!success) {
            fileSystem.delete(inProgressOutputPath, false);
        } else {
            fileSystem.rename(inProgressOutputPath, outputPath);
        }
    }
}
 
Example 20
Source File: HadoopIgfs20FileSystemAbstractSelfTest.java    From ignite with Apache License 2.0 3 votes vote down vote up
/** @throws Exception If failed. */
@Test
public void testAppend() throws Exception {
    Path fsHome = new Path(primaryFsUri);
    Path file = new Path(fsHome, "someFile");

    int cnt = 1024;

    FSDataOutputStream out = fs.create(file, EnumSet.noneOf(CreateFlag.class),
        Options.CreateOpts.perms(FsPermission.getDefault()));

    for (int i = 0; i < cnt; i++)
        out.writeLong(i);

    out.close();

    out = fs.create(file, EnumSet.of(CreateFlag.APPEND),
        Options.CreateOpts.perms(FsPermission.getDefault()));

    for (int i = cnt; i < cnt * 2; i++)
        out.writeLong(i);

    out.close();

    FSDataInputStream in = fs.open(file, 1024);

    for (int i = 0; i < cnt * 2; i++)
        assertEquals(i, in.readLong());

    in.close();
}