Java Code Examples for org.apache.hadoop.fs.FileSystem.getFileChecksum()

The following are Jave code examples for showing how to use getFileChecksum() of the org.apache.hadoop.fs.FileSystem class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
Example 1
Project: circus-train   File: PathToPathMetadata.java   Source Code and License Vote up 6 votes
@Override
public PathMetadata apply(@Nonnull Path location) {
  try {
    FileSystem fs = location.getFileSystem(conf);
    FileStatus fileStatus = fs.getFileStatus(location);
    FileChecksum checksum = null;
    if (fileStatus.isFile()) {
      checksum = fs.getFileChecksum(location);
    }

    List<PathMetadata> childPathDescriptors = new ArrayList<>();
    if (fileStatus.isDirectory()) {
      FileStatus[] childStatuses = fs.listStatus(location);
      for (FileStatus childStatus : childStatuses) {
        childPathDescriptors.add(apply(childStatus.getPath()));
      }
    }

    return new PathMetadata(location, fileStatus.getModificationTime(), checksum, childPathDescriptors);

  } catch (IOException e) {
    throw new CircusTrainException("Unable to compute digest for location " + location.toString(), e);
  }
}
 
Example 2
Project: hadoop   File: BaseTestHttpFSWith.java   Source Code and License Vote up 6 votes
private void testChecksum() throws Exception {
  if (!isLocalFS()) {
    FileSystem fs = FileSystem.get(getProxiedFSConf());
    fs.mkdirs(getProxiedFSTestDir());
    Path path = new Path(getProxiedFSTestDir(), "foo.txt");
    OutputStream os = fs.create(path);
    os.write(1);
    os.close();
    FileChecksum hdfsChecksum = fs.getFileChecksum(path);
    fs.close();
    fs = getHttpFSFileSystem();
    FileChecksum httpChecksum = fs.getFileChecksum(path);
    fs.close();
    Assert.assertEquals(httpChecksum.getAlgorithmName(), hdfsChecksum.getAlgorithmName());
    Assert.assertEquals(httpChecksum.getLength(), hdfsChecksum.getLength());
    Assert.assertArrayEquals(httpChecksum.getBytes(), hdfsChecksum.getBytes());
  }
}
 
Example 3
Project: alluxio   File: HdfsAndAlluxioUtils_update.java   Source Code and License Vote up 5 votes
/**
 * 此方法用于获取文件的FileChecksum
 *
 * @param fileSystemInfo
 *            文件系统信息
 * @param path
 *            文件路径
 * @return FileChecksum
 */
public static FileChecksum getFileChecksum(FileSystemInfo fileSystemInfo, String path) {
	FileSystem fs = getFileSystem(fileSystemInfo);
	Path uri = new Path(path);
	try {
		pathNotExistCheck(path, fs, uri);
		return fs.getFileChecksum(uri);
	} catch (IOException e) {
		e.printStackTrace();
	} finally {
		closeFileSystem(fs);
	}
	return null;
}
 
Example 4
Project: hadoop   File: TestEncryptedTransfer.java   Source Code and License Vote up 5 votes
@Test
public void testEncryptedReadAfterNameNodeRestart() throws IOException {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    cluster = new MiniDFSCluster.Builder(conf).build();
    
    FileSystem fs = getFileSystem(conf);
    writeTestDataToFile(fs);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    FileChecksum checksum = fs.getFileChecksum(TEST_PATH);
    fs.close();
    cluster.shutdown();
    
    setEncryptionConfigKeys(conf);
    
    cluster = new MiniDFSCluster.Builder(conf)
        .manageDataDfsDirs(false)
        .manageNameDfsDirs(false)
        .format(false)
        .startupOption(StartupOption.REGULAR)
        .build();
    
    fs = getFileSystem(conf);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    assertEquals(checksum, fs.getFileChecksum(TEST_PATH));
    fs.close();
    
    cluster.restartNameNode();
    fs = getFileSystem(conf);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    assertEquals(checksum, fs.getFileChecksum(TEST_PATH));
    fs.close();
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 5
Project: hadoop   File: TestDFSClientRetries.java   Source Code and License Vote up 5 votes
@Test
public void testGetFileChecksum() throws Exception {
  final String f = "/testGetFileChecksum";
  final Path p = new Path(f);

  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  try {
    cluster.waitActive();

    //create a file
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, p, 1L << 20, (short)3, 20100402L);

    //get checksum
    final FileChecksum cs1 = fs.getFileChecksum(p);
    assertTrue(cs1 != null);

    //stop the first datanode
    final List<LocatedBlock> locatedblocks = DFSClient.callGetBlockLocations(
        cluster.getNameNodeRpc(), f, 0, Long.MAX_VALUE)
          .getLocatedBlocks();
    final DatanodeInfo first = locatedblocks.get(0).getLocations()[0];
    cluster.stopDataNode(first.getXferAddr());

    //get checksum again
    final FileChecksum cs2 = fs.getFileChecksum(p);
    assertEquals(cs1, cs2);
  } finally {
    cluster.shutdown();
  }
}
 
Example 6
Project: hadoop   File: CopyMapper.java   Source Code and License Vote up 5 votes
private FileAction checkUpdate(FileSystem sourceFS, FileStatus source,
    Path target) throws IOException {
  final FileStatus targetFileStatus;
  try {
    targetFileStatus = targetFS.getFileStatus(target);
  } catch (FileNotFoundException e) {
    return FileAction.OVERWRITE;
  }
  if (targetFileStatus != null && !overWrite) {
    if (canSkip(sourceFS, source, targetFileStatus)) {
      return FileAction.SKIP;
    } else if (append) {
      long targetLen = targetFileStatus.getLen();
      if (targetLen < source.getLen()) {
        FileChecksum sourceChecksum = sourceFS.getFileChecksum(
            source.getPath(), targetLen);
        if (sourceChecksum != null
            && sourceChecksum.equals(targetFS.getFileChecksum(target))) {
          // We require that the checksum is not null. Thus currently only
          // DistributedFileSystem is supported
          return FileAction.APPEND;
        }
      }
    }
  }
  return FileAction.OVERWRITE;
}
 
Example 7
Project: ditb   File: ExportSnapshot.java   Source Code and License Vote up 5 votes
private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
  try {
    return fs.getFileChecksum(path);
  } catch (IOException e) {
    LOG.warn("Unable to get checksum for file=" + path, e);
    return null;
  }
}
 
Example 8
Project: hadoop   File: TestDistributedFileSystem.java   Source Code and License Vote up 4 votes
@Test
public void testCreateWithCustomChecksum() throws Exception {
  Configuration conf = getTestConfiguration();
  MiniDFSCluster cluster = null;
  Path testBasePath = new Path("/test/csum");
  // create args 
  Path path1 = new Path(testBasePath, "file_wtih_crc1");
  Path path2 = new Path(testBasePath, "file_with_crc2");
  ChecksumOpt opt1 = new ChecksumOpt(DataChecksum.Type.CRC32C, 512);
  ChecksumOpt opt2 = new ChecksumOpt(DataChecksum.Type.CRC32, 512);

  // common args
  FsPermission perm = FsPermission.getDefault().applyUMask(
      FsPermission.getUMask(conf));
  EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.OVERWRITE,
      CreateFlag.CREATE);
  short repl = 1;

  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    FileSystem dfs = cluster.getFileSystem();

    dfs.mkdirs(testBasePath);

    // create two files with different checksum types
    FSDataOutputStream out1 = dfs.create(path1, perm, flags, 4096, repl,
        131072L, null, opt1);
    FSDataOutputStream out2 = dfs.create(path2, perm, flags, 4096, repl,
        131072L, null, opt2);

    for (int i = 0; i < 1024; i++) {
      out1.write(i);
      out2.write(i);
    }
    out1.close();
    out2.close();

    // the two checksums must be different.
    MD5MD5CRC32FileChecksum sum1 =
        (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1);
    MD5MD5CRC32FileChecksum sum2 =
        (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2);
    assertFalse(sum1.equals(sum2));

    // check the individual params
    assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType());
    assertEquals(DataChecksum.Type.CRC32,  sum2.getCrcType());

  } finally {
    if (cluster != null) {
      cluster.getFileSystem().delete(testBasePath, true);
      cluster.shutdown();
    }
  }
}
 
Example 9
Project: hadoop   File: TestEncryptedTransfer.java   Source Code and License Vote up 4 votes
@Test
public void testEncryptedRead() throws IOException {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    cluster = new MiniDFSCluster.Builder(conf).build();
    
    FileSystem fs = getFileSystem(conf);
    writeTestDataToFile(fs);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    FileChecksum checksum = fs.getFileChecksum(TEST_PATH);
    fs.close();
    cluster.shutdown();
    
    setEncryptionConfigKeys(conf);
    
    cluster = new MiniDFSCluster.Builder(conf)
        .manageDataDfsDirs(false)
        .manageNameDfsDirs(false)
        .format(false)
        .startupOption(StartupOption.REGULAR)
        .build();
    
    fs = getFileSystem(conf);
    LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(
        LogFactory.getLog(SaslDataTransferServer.class));
    LogCapturer logs1 = GenericTestUtils.LogCapturer.captureLogs(
        LogFactory.getLog(DataTransferSaslUtil.class));
    try {
      assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
      assertEquals(checksum, fs.getFileChecksum(TEST_PATH));
    } finally {
      logs.stopCapturing();
      logs1.stopCapturing();
    }
    
    fs.close();
    
    if (resolverClazz == null) {
      // Test client and server negotiate cipher option
      GenericTestUtils.assertDoesNotMatch(logs.getOutput(),
          "Server using cipher suite");
      // Check the IOStreamPair
      GenericTestUtils.assertDoesNotMatch(logs1.getOutput(),
          "Creating IOStreamPair of CryptoInputStream and CryptoOutputStream.");
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 10
Project: hadoop   File: TestEncryptedTransfer.java   Source Code and License Vote up 4 votes
@Test
public void testEncryptedReadWithRC4() throws IOException {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    cluster = new MiniDFSCluster.Builder(conf).build();
    
    FileSystem fs = getFileSystem(conf);
    writeTestDataToFile(fs);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    FileChecksum checksum = fs.getFileChecksum(TEST_PATH);
    fs.close();
    cluster.shutdown();
    
    setEncryptionConfigKeys(conf);
    // It'll use 3DES by default, but we set it to rc4 here.
    conf.set(DFSConfigKeys.DFS_DATA_ENCRYPTION_ALGORITHM_KEY, "rc4");
    
    cluster = new MiniDFSCluster.Builder(conf)
        .manageDataDfsDirs(false)
        .manageNameDfsDirs(false)
        .format(false)
        .startupOption(StartupOption.REGULAR)
        .build();
    
    fs = getFileSystem(conf);
    LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(
        LogFactory.getLog(SaslDataTransferServer.class));
    LogCapturer logs1 = GenericTestUtils.LogCapturer.captureLogs(
        LogFactory.getLog(DataTransferSaslUtil.class));
    try {
      assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
      assertEquals(checksum, fs.getFileChecksum(TEST_PATH));
    } finally {
      logs.stopCapturing();
      logs1.stopCapturing();
    }

    fs.close();

    if (resolverClazz == null) {
      // Test client and server negotiate cipher option
      GenericTestUtils.assertDoesNotMatch(logs.getOutput(),
          "Server using cipher suite");
      // Check the IOStreamPair
      GenericTestUtils.assertDoesNotMatch(logs1.getOutput(),
          "Creating IOStreamPair of CryptoInputStream and CryptoOutputStream.");
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 11
Project: hadoop   File: TestEncryptedTransfer.java   Source Code and License Vote up 4 votes
@Test
public void testEncryptedReadWithAES() throws IOException {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    conf.set(DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_CIPHER_SUITES_KEY,
        "AES/CTR/NoPadding");
    cluster = new MiniDFSCluster.Builder(conf).build();

    FileSystem fs = getFileSystem(conf);
    writeTestDataToFile(fs);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    FileChecksum checksum = fs.getFileChecksum(TEST_PATH);
    fs.close();
    cluster.shutdown();

    setEncryptionConfigKeys(conf);

    cluster = new MiniDFSCluster.Builder(conf)
        .manageDataDfsDirs(false)
        .manageNameDfsDirs(false)
        .format(false)
        .startupOption(StartupOption.REGULAR)
        .build();

    fs = getFileSystem(conf);
    LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(
        LogFactory.getLog(SaslDataTransferServer.class));
    LogCapturer logs1 = GenericTestUtils.LogCapturer.captureLogs(
        LogFactory.getLog(DataTransferSaslUtil.class));
    try {
      assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
      assertEquals(checksum, fs.getFileChecksum(TEST_PATH));
    } finally {
      logs.stopCapturing();
      logs1.stopCapturing();
    }

    fs.close();

    if (resolverClazz == null) {
      // Test client and server negotiate cipher option
      GenericTestUtils.assertMatches(logs.getOutput(),
          "Server using cipher suite");
      // Check the IOStreamPair
      GenericTestUtils.assertMatches(logs1.getOutput(),
          "Creating IOStreamPair of CryptoInputStream and CryptoOutputStream.");
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 12
Project: hadoop   File: TestEncryptedTransfer.java   Source Code and License Vote up 4 votes
@Test
public void testLongLivedReadClientAfterRestart() throws IOException {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    cluster = new MiniDFSCluster.Builder(conf).build();
    
    FileSystem fs = getFileSystem(conf);
    writeTestDataToFile(fs);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    FileChecksum checksum = fs.getFileChecksum(TEST_PATH);
    fs.close();
    cluster.shutdown();
    
    setEncryptionConfigKeys(conf);
    
    cluster = new MiniDFSCluster.Builder(conf)
        .manageDataDfsDirs(false)
        .manageNameDfsDirs(false)
        .format(false)
        .startupOption(StartupOption.REGULAR)
        .build();
    
    fs = getFileSystem(conf);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    assertEquals(checksum, fs.getFileChecksum(TEST_PATH));
    
    // Restart the NN and DN, after which the client's encryption key will no
    // longer be valid.
    cluster.restartNameNode();
    assertTrue(cluster.restartDataNode(0));
    
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    assertEquals(checksum, fs.getFileChecksum(TEST_PATH));
    
    fs.close();
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 13
Project: hadoop   File: TestEncryptedTransfer.java   Source Code and License Vote up 4 votes
@Test
public void testLongLivedClient() throws IOException, InterruptedException {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    cluster = new MiniDFSCluster.Builder(conf).build();
    
    FileSystem fs = getFileSystem(conf);
    writeTestDataToFile(fs);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    FileChecksum checksum = fs.getFileChecksum(TEST_PATH);
    fs.close();
    cluster.shutdown();
    
    setEncryptionConfigKeys(conf);
    
    cluster = new MiniDFSCluster.Builder(conf)
        .manageDataDfsDirs(false)
        .manageNameDfsDirs(false)
        .format(false)
        .startupOption(StartupOption.REGULAR)
        .build();
    
    BlockTokenSecretManager btsm = cluster.getNamesystem().getBlockManager()
        .getBlockTokenSecretManager();
    btsm.setKeyUpdateIntervalForTesting(2 * 1000);
    btsm.setTokenLifetime(2 * 1000);
    btsm.clearAllKeysForTesting();
    
    fs = getFileSystem(conf);
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    assertEquals(checksum, fs.getFileChecksum(TEST_PATH));
    
    // Sleep for 15 seconds, after which the encryption key will no longer be
    // valid. It needs to be a few multiples of the block token lifetime,
    // since several block tokens are valid at any given time (the current
    // and the last two, by default.)
    LOG.info("Sleeping so that encryption keys expire...");
    Thread.sleep(15 * 1000);
    LOG.info("Done sleeping.");
    
    assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH));
    assertEquals(checksum, fs.getFileChecksum(TEST_PATH));
    
    fs.close();
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 14
Project: hadoop   File: RetriableFileCopyCommand.java   Source Code and License Vote up 4 votes
private long doCopy(FileStatus sourceFileStatus, Path target,
    Mapper.Context context, EnumSet<FileAttribute> fileAttributes)
    throws IOException {
  final boolean toAppend = action == FileAction.APPEND;
  Path targetPath = toAppend ? target : getTmpFile(target, context);
  final Configuration configuration = context.getConfiguration();
  FileSystem targetFS = target.getFileSystem(configuration);

  try {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target);
      LOG.debug("Target file path: " + targetPath);
    }
    final Path sourcePath = sourceFileStatus.getPath();
    final FileSystem sourceFS = sourcePath.getFileSystem(configuration);
    final FileChecksum sourceChecksum = fileAttributes
        .contains(FileAttribute.CHECKSUMTYPE) ? sourceFS
        .getFileChecksum(sourcePath) : null;

    final long offset = action == FileAction.APPEND ? targetFS.getFileStatus(
        target).getLen() : 0;
    long bytesRead = copyToFile(targetPath, targetFS, sourceFileStatus,
        offset, context, fileAttributes, sourceChecksum);

    compareFileLengths(sourceFileStatus, targetPath, configuration, bytesRead
        + offset);
    //At this point, src&dest lengths are same. if length==0, we skip checksum
    if ((bytesRead != 0) && (!skipCrc)) {
      compareCheckSums(sourceFS, sourceFileStatus.getPath(), sourceChecksum,
          targetFS, targetPath);
    }
    // it's not append case, thus we first write to a temporary file, rename
    // it to the target path.
    if (!toAppend) {
      promoteTmpToTarget(targetPath, target, targetFS);
    }
    return bytesRead;
  } finally {
    // note that for append case, it is possible that we append partial data
    // and then fail. In that case, for the next retry, we either reuse the
    // partial appended data if it is good or we overwrite the whole file
    if (!toAppend && targetFS.exists(targetPath)) {
      targetFS.delete(targetPath, false);
    }
  }
}
 
Example 15
Project: hadoop   File: DistCpUtils.java   Source Code and License Vote up 3 votes
/**
 * Utility to compare checksums for the paths specified.
 *
 * If checksums's can't be retrieved, it doesn't fail the test
 * Only time the comparison would fail is when checksums are
 * available and they don't match
 *
 * @param sourceFS FileSystem for the source path.
 * @param source The source path.
 * @param sourceChecksum The checksum of the source file. If it is null we
 * still need to retrieve it through sourceFS.
 * @param targetFS FileSystem for the target path.
 * @param target The target path.
 * @return If either checksum couldn't be retrieved, the function returns
 * false. If checksums are retrieved, the function returns true if they match,
 * and false otherwise.
 * @throws IOException if there's an exception while retrieving checksums.
 */
public static boolean checksumsAreEqual(FileSystem sourceFS, Path source,
    FileChecksum sourceChecksum, FileSystem targetFS, Path target)
    throws IOException {
  FileChecksum targetChecksum = null;
  try {
    sourceChecksum = sourceChecksum != null ? sourceChecksum : sourceFS
        .getFileChecksum(source);
    targetChecksum = targetFS.getFileChecksum(target);
  } catch (IOException e) {
    LOG.error("Unable to retrieve checksum for " + source + " or " + target, e);
  }
  return (sourceChecksum == null || targetChecksum == null ||
          sourceChecksum.equals(targetChecksum));
}
 
Example 16
Project: hadoop   File: FSOperations.java   Source Code and License Vote up 2 votes
/**
 * Executes the filesystem operation.
 *
 * @param fs filesystem instance to use.
 *
 * @return a Map object (JSON friendly) with the file checksum.
 *
 * @throws IOException thrown if an IO error occured.
 */
@Override
public Map execute(FileSystem fs) throws IOException {
  FileChecksum checksum = fs.getFileChecksum(path);
  return fileChecksumToJSON(checksum);
}