org.apache.hadoop.tools.CopyListingFileStatus Java Examples

The following examples show how to use org.apache.hadoop.tools.CopyListingFileStatus. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CircusTrainCopyListing.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@Override
public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException {
  try (Writer writer = newWriter(pathToListFile)) {

    Path sourceRootPath = getRootPath(getConf());

    for (Path sourcePath : options.getSourcePaths()) {

      FileSystem fileSystem = sourcePath.getFileSystem(getConf());
      FileStatus directory = fileSystem.getFileStatus(sourcePath);

      Map<String, CopyListingFileStatus> children = new FileStatusTreeTraverser(fileSystem)
          .preOrderTraversal(directory)
          .transform(new CopyListingFileStatusFunction(fileSystem, options))
          .uniqueIndex(new RelativePathFunction(sourceRootPath));

      for (Entry<String, CopyListingFileStatus> entry : children.entrySet()) {
        LOG.debug("Adding '{}' with relative path '{}'", entry.getValue().getPath(), entry.getKey());
        writer.append(new Text(entry.getKey()), entry.getValue());
        writer.sync();
      }
    }
  }
}
 
Example #2
Source File: TestUniformSizeInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
  long lastEnd = 0;

  //Verify if each split's start is matching with the previous end and
  //we are not missing anything
  for (InputSplit split : splits) {
    FileSplit fileSplit = (FileSplit) split;
    long start = fileSplit.getStart();
    Assert.assertEquals(lastEnd, start);
    lastEnd = start + fileSplit.getLength();
  }

  //Verify there is nothing more to read from the input file
  SequenceFile.Reader reader
          = new SequenceFile.Reader(cluster.getFileSystem().getConf(),
                  SequenceFile.Reader.file(listFile));

  try {
    reader.seek(lastEnd);
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
  } finally {
    IOUtils.closeStream(reader);
  }
}
 
Example #3
Source File: TestCopyMapper.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper,
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context) {
  try {
    for (Path path : pathList) {
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
              new CopyListingFileStatus(fs.getFileStatus(path)), context);
    }

    Assert.assertEquals(nFiles,
            context.getCounter(CopyMapper.Counter.SKIP).getValue());
  }
  catch (Exception exception) {
    Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(),
            false);
  }
}
 
Example #4
Source File: TestUniformSizeInputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
  long lastEnd = 0;

  //Verify if each split's start is matching with the previous end and
  //we are not missing anything
  for (InputSplit split : splits) {
    FileSplit fileSplit = (FileSplit) split;
    long start = fileSplit.getStart();
    Assert.assertEquals(lastEnd, start);
    lastEnd = start + fileSplit.getLength();
  }

  //Verify there is nothing more to read from the input file
  SequenceFile.Reader reader
          = new SequenceFile.Reader(cluster.getFileSystem().getConf(),
                  SequenceFile.Reader.file(listFile));

  try {
    reader.seek(lastEnd);
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
  } finally {
    IOUtils.closeStream(reader);
  }
}
 
Example #5
Source File: TestCopyMapper.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper,
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context) {
  try {
    for (Path path : pathList) {
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
              new CopyListingFileStatus(fs.getFileStatus(path)), context);
    }

    Assert.assertEquals(nFiles,
            context.getCounter(CopyMapper.Counter.SKIP).getValue());
  }
  catch (Exception exception) {
    Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(),
            false);
  }
}
 
Example #6
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreservePermissionOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.PERMISSION);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertTrue(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #7
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveTimestampOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.TIMES);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime());
}
 
Example #8
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveNothingOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertTrue(dstStatus.getAccessTime() == 100);
  Assert.assertTrue(dstStatus.getModificationTime() == 100);
  Assert.assertTrue(dstStatus.getReplication() == 0);
}
 
Example #9
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveUserOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.USER);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
}
 
Example #10
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveGroupOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.GROUP);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
}
 
Example #11
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreservePermissionOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.PERMISSION);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertTrue(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
}
 
Example #12
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveReplicationOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.REPLICATION);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  // Replication shouldn't apply to dirs so this should still be 0 == 0
  Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #13
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveGroupOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.GROUP);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #14
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveUserOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.USER);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #15
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveReplicationOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.REPLICATION);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #16
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveTimestampOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.TIMES);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #17
Source File: MapReduceBackupCopyJob.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
protected Path createInputFileListing(Job job) throws IOException {

  if (conf.get(NUMBER_OF_LEVELS_TO_PRESERVE_KEY) == null) {
    return super.createInputFileListing(job);
  }
  long totalBytesExpected = 0;
  int totalRecords = 0;
  Path fileListingPath = getFileListingPath();
  try (SequenceFile.Writer writer = getWriter(fileListingPath)) {
    List<Path> srcFiles = getSourceFiles();
    if (srcFiles.size() == 0) {
      return fileListingPath;
    }
    totalRecords = srcFiles.size();
    FileSystem fs = srcFiles.get(0).getFileSystem(conf);
    for (Path path : srcFiles) {
      FileStatus fst = fs.getFileStatus(path);
      totalBytesExpected += fst.getLen();
      Text key = getKey(path);
      writer.append(key, new CopyListingFileStatus(fst));
    }
    writer.close();

    // update jobs configuration

    Configuration cfg = job.getConfiguration();
    cfg.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalBytesExpected);
    cfg.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, fileListingPath.toString());
    cfg.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, totalRecords);
  } catch (NoSuchFieldException | SecurityException | IllegalArgumentException
      | IllegalAccessException | NoSuchMethodException | ClassNotFoundException
      | InvocationTargetException e) {
    throw new IOException(e);
  }
  return fileListingPath;
}
 
Example #18
Source File: MapReduceBackupCopyJob.java    From hbase with Apache License 2.0 5 votes vote down vote up
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
  FileSystem fs = pathToListFile.getFileSystem(conf);
  fs.delete(pathToListFile, false);
  return SequenceFile.createWriter(conf, SequenceFile.Writer.file(pathToListFile),
    SequenceFile.Writer.keyClass(Text.class),
    SequenceFile.Writer.valueClass(CopyListingFileStatus.class),
    SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
}
 
Example #19
Source File: TestDistCpUtils.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveTimestampOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.TIMES);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #20
Source File: TestDistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveDefaults() throws IOException {
  FileSystem fs = FileSystem.get(config);
  
  // preserve replication, block size, user, group, permission, 
  // checksum type and timestamps    
  EnumSet<FileAttribute> attributes = 
      DistCpUtils.unpackAttributes(
          DistCpOptionSwitch.PRESERVE_STATUS_DEFAULT.substring(1));

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);
  
  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertTrue(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #21
Source File: TestDistCpUtils.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveGroupOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.GROUP);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #22
Source File: CircusTrainCopyListing.java    From circus-train with Apache License 2.0 5 votes vote down vote up
private Writer newWriter(Path pathToListFile) throws IOException {
  FileSystem fs = pathToListFile.getFileSystem(getConf());
  if (fs.exists(pathToListFile)) {
    fs.delete(pathToListFile, false);
  }
  return createWriter(getConf(), file(pathToListFile), keyClass(Text.class), valueClass(CopyListingFileStatus.class),
      compression(NONE));
}
 
Example #23
Source File: TestCopyMapper.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() {
  try {

    deleteState();
    createSourceDataWithDifferentBlockSize();

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
        = stubContext.getContext();

    Configuration configuration = context.getConfiguration();
    EnumSet<DistCpOptions.FileAttribute> fileAttributes
        = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
        DistCpUtils.packAttributes(fileAttributes));

    copyMapper.setup(context);

    for (Path path : pathList) {
      final FileStatus fileStatus = fs.getFileStatus(path);
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
          new CopyListingFileStatus(fileStatus), context);
    }

    Assert.fail("Copy should have failed because of block-size difference.");
  }
  catch (Exception exception) {
    // Check that the exception suggests the use of -pb/-skipCrc.
    Assert.assertTrue("Failure exception should have suggested the use of -pb.", exception.getCause().getCause().getMessage().contains("pb"));
    Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", exception.getCause().getCause().getMessage().contains("skipCrc"));
  }
}
 
Example #24
Source File: TestDistCpUtils.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveUserOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.USER);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #25
Source File: TestCopyMapper.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test(timeout=40000)
public void testMakeDirFailure() {
  try {
    deleteState();
    createSourceData();

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
            = stubContext.getContext();

    Configuration configuration = context.getConfiguration();
    String workPath = new Path("hftp://localhost:1234/*/*/*/?/")
            .makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString();
    configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
            workPath);
    copyMapper.setup(context);

    copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))),
            new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context);

    Assert.assertTrue("There should have been an exception.", false);
  }
  catch (Exception ignore) {
  }
}
 
Example #26
Source File: TestDistCpUtils.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveReplicationOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.REPLICATION);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example #27
Source File: TestCopyMapper.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyWithAppend() throws Exception {
  final FileSystem fs = cluster.getFileSystem();
  // do the first distcp
  testCopy(false);
  // start appending data to source
  appendSourceData();

  // do the distcp again with -update and -append option
  CopyMapper copyMapper = new CopyMapper();
  StubContext stubContext = new StubContext(getConfiguration(), null, 0);
  Mapper<Text, CopyListingFileStatus, Text, Text>.Context context =
      stubContext.getContext();
  // Enable append 
  context.getConfiguration().setBoolean(
      DistCpOptionSwitch.APPEND.getConfigLabel(), true);
  copyMapper.setup(context);
  for (Path path: pathList) {
    copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
            new CopyListingFileStatus(cluster.getFileSystem().getFileStatus(
                path)), context);
  }

  verifyCopy(fs, false);
  // verify that we only copied new appended data
  Assert.assertEquals(nFiles * DEFAULT_FILE_SIZE * 2, stubContext
      .getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED)
      .getValue());
  Assert.assertEquals(pathList.size(), stubContext.getReporter().
      getCounter(CopyMapper.Counter.COPY).getValue());
}
 
Example #28
Source File: DistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Sort sequence file containing FileStatus and Text as key and value respecitvely
 *
 * @param fs - File System
 * @param conf - Configuration
 * @param sourceListing - Source listing file
 * @return Path of the sorted file. Is source file with _sorted appended to the name
 * @throws IOException - Any exception during sort.
 */
public static Path sortListing(FileSystem fs, Configuration conf, Path sourceListing)
    throws IOException {
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, Text.class,
    CopyListingFileStatus.class, conf);
  Path output = new Path(sourceListing.toString() +  "_sorted");

  if (fs.exists(output)) {
    fs.delete(output, false);
  }

  sorter.sort(sourceListing, output);
  return output;
}
 
Example #29
Source File: DistCpUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Converts a FileStatus to a CopyListingFileStatus.  If preserving ACLs,
 * populates the CopyListingFileStatus with the ACLs. If preserving XAttrs,
 * populates the CopyListingFileStatus with the XAttrs.
 *
 * @param fileSystem FileSystem containing the file
 * @param fileStatus FileStatus of file
 * @param preserveAcls boolean true if preserving ACLs
 * @param preserveXAttrs boolean true if preserving XAttrs
 * @param preserveRawXAttrs boolean true if preserving raw.* XAttrs
 * @throws IOException if there is an I/O error
 */
public static CopyListingFileStatus toCopyListingFileStatus(
    FileSystem fileSystem, FileStatus fileStatus, boolean preserveAcls, 
    boolean preserveXAttrs, boolean preserveRawXAttrs) throws IOException {
  CopyListingFileStatus copyListingFileStatus =
    new CopyListingFileStatus(fileStatus);
  if (preserveAcls) {
    FsPermission perm = fileStatus.getPermission();
    if (perm.getAclBit()) {
      List<AclEntry> aclEntries = fileSystem.getAclStatus(
        fileStatus.getPath()).getEntries();
      copyListingFileStatus.setAclEntries(aclEntries);
    }
  }
  if (preserveXAttrs || preserveRawXAttrs) {
    Map<String, byte[]> srcXAttrs = fileSystem.getXAttrs(fileStatus.getPath());
    if (preserveXAttrs && preserveRawXAttrs) {
       copyListingFileStatus.setXAttrs(srcXAttrs);
    } else {
      Map<String, byte[]> trgXAttrs = Maps.newHashMap();
      final String rawNS =
          StringUtils.toLowerCase(XAttr.NameSpace.RAW.name());
      for (Map.Entry<String, byte[]> ent : srcXAttrs.entrySet()) {
        final String xattrName = ent.getKey();
        if (xattrName.startsWith(rawNS)) {
          if (preserveRawXAttrs) {
            trgXAttrs.put(xattrName, ent.getValue());
          }
        } else if (preserveXAttrs) {
          trgXAttrs.put(xattrName, ent.getValue());
        }
      }
      copyListingFileStatus.setXAttrs(trgXAttrs);
    }
  }
  return copyListingFileStatus;
}
 
Example #30
Source File: DynamicInputChunk.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void openForWrite() throws IOException {
  writer = SequenceFile.createWriter(
          chunkFilePath.getFileSystem(configuration), configuration,
          chunkFilePath, Text.class, CopyListingFileStatus.class,
          SequenceFile.CompressionType.NONE);

}