Java Code Examples for org.apache.hadoop.fs.FileSystem#getStatistics()

The following examples show how to use org.apache.hadoop.fs.FileSystem#getStatistics() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: NativeAzureFileSystemBaseTest.java From hadoop with Apache License 2.0

6 votes

@Test
public void testStatistics() throws Exception {
  FileSystem.clearStatistics();
  FileSystem.Statistics stats = FileSystem.getStatistics("wasb",
      NativeAzureFileSystem.class);
  assertEquals(0, stats.getBytesRead());
  assertEquals(0, stats.getBytesWritten());
  Path newFile = new Path("testStats");
  writeString(newFile, "12345678");
  assertEquals(8, stats.getBytesWritten());
  assertEquals(0, stats.getBytesRead());
  String readBack = readString(newFile);
  assertEquals("12345678", readBack);
  assertEquals(8, stats.getBytesRead());
  assertEquals(8, stats.getBytesWritten());
  assertTrue(fs.delete(newFile, true));
  assertEquals(8, stats.getBytesRead());
  assertEquals(8, stats.getBytesWritten());
}

Example 2

Source File: NativeAzureFileSystemBaseTest.java From big-c with Apache License 2.0

6 votes

@Test
public void testStatistics() throws Exception {
  FileSystem.clearStatistics();
  FileSystem.Statistics stats = FileSystem.getStatistics("wasb",
      NativeAzureFileSystem.class);
  assertEquals(0, stats.getBytesRead());
  assertEquals(0, stats.getBytesWritten());
  Path newFile = new Path("testStats");
  writeString(newFile, "12345678");
  assertEquals(8, stats.getBytesWritten());
  assertEquals(0, stats.getBytesRead());
  String readBack = readString(newFile);
  assertEquals("12345678", readBack);
  assertEquals(8, stats.getBytesRead());
  assertEquals(8, stats.getBytesWritten());
  assertTrue(fs.delete(newFile, true));
  assertEquals(8, stats.getBytesRead());
  assertEquals(8, stats.getBytesWritten());
}

Example 3

Source File: DFSTestUtil.java From hadoop with Apache License 2.0

4 votes

public static Statistics getStatistics(FileSystem fs) {
  return FileSystem.getStatistics(fs.getUri().getScheme(), fs.getClass());
}

Example 4

Source File: DFSTestUtil.java From big-c with Apache License 2.0

4 votes

public static Statistics getStatistics(FileSystem fs) {
  return FileSystem.getStatistics(fs.getUri().getScheme(), fs.getClass());
}

Example 5

Source File: HadoopFileSystemIntegrationHelper.java From hadoop-connectors with Apache License 2.0

4 votes

/**
 * Helper that reads text from the given file at the given offset
 * and returns it. If checkOverflow is true, it will make sure that
 * no more than 'len' bytes were read.
 */
protected String readTextFile(
    Path hadoopPath, int offset, int len, boolean checkOverflow)
    throws IOException {
  String text = null;
  FSDataInputStream readStream = null;
  long fileSystemBytesRead = 0;
  FileSystem.Statistics stats = FileSystem.getStatistics(
      ghfsFileSystemDescriptor.getScheme(), ghfs.getClass());
  if (stats != null) {
    // Let it be null in case no stats have been added for our scheme yet.
    fileSystemBytesRead =
        stats.getBytesRead();
  }

  try {
    int bufferSize = len;
    bufferSize += checkOverflow ? 1 : 0;
    byte[] readBuffer = new byte[bufferSize];
    readStream =
        ghfs.open(
            hadoopPath,
            GoogleHadoopFileSystemConfiguration.GCS_INPUT_STREAM_BUFFER_SIZE.getDefault());
    int numBytesRead;
    if (offset > 0) {
      numBytesRead = readStream.read(offset, readBuffer, 0, bufferSize);
    } else {
      numBytesRead = readStream.read(readBuffer);
    }
    assertThat(numBytesRead).isEqualTo(len);
    text = new String(readBuffer, 0, numBytesRead, StandardCharsets.UTF_8);
  } finally {
    if (readStream != null) {
      readStream.close();
    }
  }

  // After the read, the stats better be non-null for our ghfs scheme.
  stats = FileSystem.getStatistics(
      ghfsFileSystemDescriptor.getScheme(), ghfs.getClass());
  assertThat(stats).isNotNull();
  long endFileSystemBytesRead = stats.getBytesRead();
  int bytesReadStats = (int) (endFileSystemBytesRead - fileSystemBytesRead);
  if (statistics == FileSystemStatistics.EXACT) {
    assertWithMessage("FS statistics mismatch fetched from class '%s'", ghfs.getClass())
        .that(bytesReadStats)
        .isEqualTo(len);
  } else if (statistics == FileSystemStatistics.GREATER_OR_EQUAL) {
    assertWithMessage("Expected %d <= %d", len, bytesReadStats)
        .that(len <= bytesReadStats)
        .isTrue();
  } else if (statistics == FileSystemStatistics.NONE) {
    assertWithMessage("FS statistics expected to be 0").that(fileSystemBytesRead).isEqualTo(0);
    assertWithMessage("FS statistics expected to be 0").that(endFileSystemBytesRead).isEqualTo(0);
  } else if (statistics == FileSystemStatistics.IGNORE) {
    // NO-OP
  }

  return text;
}

Example 6

Source File: HadoopFileSystemIntegrationHelper.java From hadoop-connectors with Apache License 2.0

4 votes

/**
 * Writes a file with the given buffer repeated numWrites times.
 *
 * @param hadoopPath Path of the file to create.
 * @param buffer Data to write.
 * @param numWrites Number of times to repeat the data.
 * @param overwrite If true, overwrite any existing file.
 * @return Number of bytes written.
 */
public int writeFile(Path hadoopPath, byte[] buffer, int numWrites, boolean overwrite)
    throws IOException {
  int numBytesWritten = -1;
  int totalBytesWritten = 0;

  long fileSystemBytesWritten = 0;
  FileSystem.Statistics stats = FileSystem.getStatistics(
      ghfsFileSystemDescriptor.getScheme(), ghfs.getClass());
  if (stats != null) {
    // Let it be null in case no stats have been added for our scheme yet.
    fileSystemBytesWritten =
        stats.getBytesWritten();
  }
  try (FSDataOutputStream writeStream =
      ghfs.create(
          hadoopPath,
          FsPermission.getDefault(),
          overwrite,
          GoogleHadoopFileSystemConfiguration.GCS_OUTPUT_STREAM_BUFFER_SIZE.getDefault(),
          GoogleHadoopFileSystemBase.REPLICATION_FACTOR_DEFAULT,
          GoogleHadoopFileSystemConfiguration.BLOCK_SIZE.getDefault(),
          /* progress= */ null)) {
    for (int i = 0; i < numWrites; i++) {
      writeStream.write(buffer, 0, buffer.length);
      numBytesWritten = buffer.length;
      totalBytesWritten += numBytesWritten;
    }
  }

  // After the write, the stats better be non-null for our ghfs scheme.
  stats = FileSystem.getStatistics(ghfsFileSystemDescriptor.getScheme(), ghfs.getClass());
  assertThat(stats).isNotNull();
  long endFileSystemBytesWritten =
      stats.getBytesWritten();
  int bytesWrittenStats = (int) (endFileSystemBytesWritten - fileSystemBytesWritten);
  if (statistics == FileSystemStatistics.EXACT) {
    assertWithMessage("FS statistics mismatch fetched from class '%s'", ghfs.getClass())
        .that(bytesWrittenStats)
        .isEqualTo(totalBytesWritten);
  } else if (statistics == FileSystemStatistics.GREATER_OR_EQUAL) {
    assertWithMessage("Expected %d <= %d", totalBytesWritten, bytesWrittenStats)
        .that(totalBytesWritten <= bytesWrittenStats)
        .isTrue();
  } else if (statistics == FileSystemStatistics.NONE) {
    // Do not perform any check because stats are either not maintained or are erratic.
  } else if (statistics == FileSystemStatistics.IGNORE) {
    // NO-OP
  }

  return totalBytesWritten;
}

Example 7

Source File: JobFilePartitioner.java From hraven with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {

  myConf = getConf();

  // Presume this is all HDFS paths, even when access as file://
  hdfs = FileSystem.get(myConf);

  // Grab input args and allow for -Dxyz style arguments
  String[] otherArgs = new GenericOptionsParser(myConf, args)
      .getRemainingArgs();

  // Grab the arguments we're looking for.
  CommandLine commandLine = parseArgs(otherArgs);

  // Grab the input path argument
  input = commandLine.getOptionValue("i");
  LOG.info("input=" + input);

  // Grab the input path argument
  String output = commandLine.getOptionValue("o");
  LOG.info("output=" + output);

  skipExisting = commandLine.hasOption("s");
  LOG.info("skipExisting=" + skipExisting);

  moveFiles = commandLine.hasOption("m");
  LOG.info("moveFiles=" + moveFiles);

  if (skipExisting && moveFiles) {
    throw new IllegalArgumentException(
        "Cannot use both options skipExisting and move simultaneously.");
  }

  if (commandLine.hasOption("x")) {
    try {
      maXretention = Integer.parseInt(commandLine.getOptionValue("x"));
    } catch (NumberFormatException nfe) {
      throw new IllegalArgumentException(
          "maXretention option -x is is not a valid number: "
              + commandLine.getOptionValue("x"), nfe);
    }
    // Additional check
    if (maXretention < 0) {
      throw new IllegalArgumentException(
          "Cannot retain less than 0 files. Specified maXretention option -x is: "
              + commandLine.getOptionValue("x"));
    }
    LOG.info("maXretention=" + maXretention);
    if (moveFiles) {
      throw new IllegalArgumentException(
          "Cannot use both options maXretention and move simultaneously.");
    }
  } else {
    maXretention = Integer.MAX_VALUE;
  }

  outputPath = new Path(output);
  FileStatus outputFileStatus = hdfs.getFileStatus(outputPath);

  if (!outputFileStatus.isDir()) {
    throw new IOException("Output is not a directory"
        + outputFileStatus.getPath().getName());
  }

  Path inputPath = new Path(input);
  URI inputURI = inputPath.toUri();
  String inputScheme = inputURI.getScheme();

  LOG.info("input scheme is: " + inputScheme);

  // If input directory is HDFS, then process as such. Assume not scheme is
  // HDFS
  if ((inputScheme == null)
      || (hdfs.getUri().getScheme().equals(inputScheme))) {
    processHDFSSources(inputPath);
  } else if (inputScheme.equals("file")) {
    if (moveFiles) {
      throw new IllegalArgumentException(
          "Cannot move files that are not already in hdfs. Input is not HDFS: "
              + input);
    }
    processPlainFileSources(inputURI);
  } else {
    throw new IllegalArgumentException(
        "Cannot process files from this URI scheme: " + inputScheme);
  }

  Statistics statistics = FileSystem.getStatistics(outputPath.toUri()
      .getScheme(), hdfs.getClass());
  if (statistics != null) {
    LOG.info("HDFS bytes read: " + statistics.getBytesRead());
    LOG.info("HDFS bytes written: " + statistics.getBytesWritten());
    LOG.info("HDFS read ops: " + statistics.getReadOps());
    System.out
        .println("HDFS large read ops: " + statistics.getLargeReadOps());
    LOG.info("HDFS write ops: " + statistics.getWriteOps());
  }

  return 0;
}