Java Code Examples for org.apache.hadoop.tools.CopyListingFileStatus

The following examples show how to use org.apache.hadoop.tools.CopyListingFileStatus. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: circus-train   Source File: CircusTrainCopyListing.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException {
  try (Writer writer = newWriter(pathToListFile)) {

    Path sourceRootPath = getRootPath(getConf());

    for (Path sourcePath : options.getSourcePaths()) {

      FileSystem fileSystem = sourcePath.getFileSystem(getConf());
      FileStatus directory = fileSystem.getFileStatus(sourcePath);

      Map<String, CopyListingFileStatus> children = new FileStatusTreeTraverser(fileSystem)
          .preOrderTraversal(directory)
          .transform(new CopyListingFileStatusFunction(fileSystem, options))
          .uniqueIndex(new RelativePathFunction(sourceRootPath));

      for (Entry<String, CopyListingFileStatus> entry : children.entrySet()) {
        LOG.debug("Adding '{}' with relative path '{}'", entry.getValue().getPath(), entry.getKey());
        writer.append(new Text(entry.getKey()), entry.getValue());
        writer.sync();
      }
    }
  }
}
 
Example 2
Source Project: hadoop   Source File: TestCopyMapper.java    License: Apache License 2.0 6 votes vote down vote up
private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper,
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context) {
  try {
    for (Path path : pathList) {
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
              new CopyListingFileStatus(fs.getFileStatus(path)), context);
    }

    Assert.assertEquals(nFiles,
            context.getCounter(CopyMapper.Counter.SKIP).getValue());
  }
  catch (Exception exception) {
    Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(),
            false);
  }
}
 
Example 3
Source Project: hadoop   Source File: TestUniformSizeInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
  long lastEnd = 0;

  //Verify if each split's start is matching with the previous end and
  //we are not missing anything
  for (InputSplit split : splits) {
    FileSplit fileSplit = (FileSplit) split;
    long start = fileSplit.getStart();
    Assert.assertEquals(lastEnd, start);
    lastEnd = start + fileSplit.getLength();
  }

  //Verify there is nothing more to read from the input file
  SequenceFile.Reader reader
          = new SequenceFile.Reader(cluster.getFileSystem().getConf(),
                  SequenceFile.Reader.file(listFile));

  try {
    reader.seek(lastEnd);
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
  } finally {
    IOUtils.closeStream(reader);
  }
}
 
Example 4
Source Project: big-c   Source File: TestCopyMapper.java    License: Apache License 2.0 6 votes vote down vote up
private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper,
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context) {
  try {
    for (Path path : pathList) {
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
              new CopyListingFileStatus(fs.getFileStatus(path)), context);
    }

    Assert.assertEquals(nFiles,
            context.getCounter(CopyMapper.Counter.SKIP).getValue());
  }
  catch (Exception exception) {
    Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(),
            false);
  }
}
 
Example 5
Source Project: big-c   Source File: TestUniformSizeInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
  long lastEnd = 0;

  //Verify if each split's start is matching with the previous end and
  //we are not missing anything
  for (InputSplit split : splits) {
    FileSplit fileSplit = (FileSplit) split;
    long start = fileSplit.getStart();
    Assert.assertEquals(lastEnd, start);
    lastEnd = start + fileSplit.getLength();
  }

  //Verify there is nothing more to read from the input file
  SequenceFile.Reader reader
          = new SequenceFile.Reader(cluster.getFileSystem().getConf(),
                  SequenceFile.Reader.file(listFile));

  try {
    reader.seek(lastEnd);
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
  } finally {
    IOUtils.closeStream(reader);
  }
}
 
Example 6
Source Project: circus-train   Source File: CircusTrainCopyListing.java    License: Apache License 2.0 5 votes vote down vote up
private Writer newWriter(Path pathToListFile) throws IOException {
  FileSystem fs = pathToListFile.getFileSystem(getConf());
  if (fs.exists(pathToListFile)) {
    fs.delete(pathToListFile, false);
  }
  return createWriter(getConf(), file(pathToListFile), keyClass(Text.class), valueClass(CopyListingFileStatus.class),
      compression(NONE));
}
 
Example 7
@Override
public CopyListingFileStatus apply(FileStatus fileStatus) {
  try {
    return DistCpUtils.toCopyListingFileStatus(fileSystem, fileStatus, preserveAcls, preserveXAttrs,
        preserveRawXAttrs);
  } catch (IOException e) {
    throw new CircusTrainException("Error transforming to CopyListingFileStatus: " + fileStatus, e);
  }
}
 
Example 8
Source Project: circus-train   Source File: CircusTrainCopyListingTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void typical() throws IOException {
  File input = temp.newFolder("input");
  File inputSub2 = new File(input, "sub1/sub2");
  inputSub2.mkdirs();
  Files.asCharSink(new File(inputSub2, "data"), UTF_8).write("test1");

  File listFile = temp.newFile("listFile");
  Path pathToListFile = new Path(listFile.toURI());

  List<Path> sourceDataLocations = new ArrayList<>();
  sourceDataLocations.add(new Path(inputSub2.toURI()));
  DistCpOptions options = new DistCpOptions(sourceDataLocations, new Path("dummy"));

  CircusTrainCopyListing.setRootPath(conf, new Path(input.toURI()));
  CircusTrainCopyListing copyListing = new CircusTrainCopyListing(conf, null);
  copyListing.doBuildListing(pathToListFile, options);

  try (Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(pathToListFile))) {
    Text key = new Text();
    CopyListingFileStatus value = new CopyListingFileStatus();

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2"));

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2/data"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2/data"));

    assertFalse(reader.next(key, value));

  }
}
 
Example 9
Source Project: hadoop   Source File: DynamicInputChunk.java    License: Apache License 2.0 5 votes vote down vote up
private void openForWrite() throws IOException {
  writer = SequenceFile.createWriter(
          chunkFilePath.getFileSystem(configuration), configuration,
          chunkFilePath, Text.class, CopyListingFileStatus.class,
          SequenceFile.CompressionType.NONE);

}
 
Example 10
Source Project: hadoop   Source File: DistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Converts a FileStatus to a CopyListingFileStatus.  If preserving ACLs,
 * populates the CopyListingFileStatus with the ACLs. If preserving XAttrs,
 * populates the CopyListingFileStatus with the XAttrs.
 *
 * @param fileSystem FileSystem containing the file
 * @param fileStatus FileStatus of file
 * @param preserveAcls boolean true if preserving ACLs
 * @param preserveXAttrs boolean true if preserving XAttrs
 * @param preserveRawXAttrs boolean true if preserving raw.* XAttrs
 * @throws IOException if there is an I/O error
 */
public static CopyListingFileStatus toCopyListingFileStatus(
    FileSystem fileSystem, FileStatus fileStatus, boolean preserveAcls, 
    boolean preserveXAttrs, boolean preserveRawXAttrs) throws IOException {
  CopyListingFileStatus copyListingFileStatus =
    new CopyListingFileStatus(fileStatus);
  if (preserveAcls) {
    FsPermission perm = fileStatus.getPermission();
    if (perm.getAclBit()) {
      List<AclEntry> aclEntries = fileSystem.getAclStatus(
        fileStatus.getPath()).getEntries();
      copyListingFileStatus.setAclEntries(aclEntries);
    }
  }
  if (preserveXAttrs || preserveRawXAttrs) {
    Map<String, byte[]> srcXAttrs = fileSystem.getXAttrs(fileStatus.getPath());
    if (preserveXAttrs && preserveRawXAttrs) {
       copyListingFileStatus.setXAttrs(srcXAttrs);
    } else {
      Map<String, byte[]> trgXAttrs = Maps.newHashMap();
      final String rawNS =
          StringUtils.toLowerCase(XAttr.NameSpace.RAW.name());
      for (Map.Entry<String, byte[]> ent : srcXAttrs.entrySet()) {
        final String xattrName = ent.getKey();
        if (xattrName.startsWith(rawNS)) {
          if (preserveRawXAttrs) {
            trgXAttrs.put(xattrName, ent.getValue());
          }
        } else if (preserveXAttrs) {
          trgXAttrs.put(xattrName, ent.getValue());
        }
      }
      copyListingFileStatus.setXAttrs(trgXAttrs);
    }
  }
  return copyListingFileStatus;
}
 
Example 11
Source Project: hadoop   Source File: DistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Sort sequence file containing FileStatus and Text as key and value respecitvely
 *
 * @param fs - File System
 * @param conf - Configuration
 * @param sourceListing - Source listing file
 * @return Path of the sorted file. Is source file with _sorted appended to the name
 * @throws IOException - Any exception during sort.
 */
public static Path sortListing(FileSystem fs, Configuration conf, Path sourceListing)
    throws IOException {
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, Text.class,
    CopyListingFileStatus.class, conf);
  Path output = new Path(sourceListing.toString() +  "_sorted");

  if (fs.exists(output)) {
    fs.delete(output, false);
  }

  sorter.sort(sourceListing, output);
  return output;
}
 
Example 12
Source Project: hadoop   Source File: TestCopyMapper.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyWithAppend() throws Exception {
  final FileSystem fs = cluster.getFileSystem();
  // do the first distcp
  testCopy(false);
  // start appending data to source
  appendSourceData();

  // do the distcp again with -update and -append option
  CopyMapper copyMapper = new CopyMapper();
  StubContext stubContext = new StubContext(getConfiguration(), null, 0);
  Mapper<Text, CopyListingFileStatus, Text, Text>.Context context =
      stubContext.getContext();
  // Enable append 
  context.getConfiguration().setBoolean(
      DistCpOptionSwitch.APPEND.getConfigLabel(), true);
  copyMapper.setup(context);
  for (Path path: pathList) {
    copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
            new CopyListingFileStatus(cluster.getFileSystem().getFileStatus(
                path)), context);
  }

  verifyCopy(fs, false);
  // verify that we only copied new appended data
  Assert.assertEquals(nFiles * DEFAULT_FILE_SIZE * 2, stubContext
      .getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED)
      .getValue());
  Assert.assertEquals(pathList.size(), stubContext.getReporter().
      getCounter(CopyMapper.Counter.COPY).getValue());
}
 
Example 13
Source Project: hadoop   Source File: TestCopyMapper.java    License: Apache License 2.0 5 votes vote down vote up
@Test(timeout=40000)
public void testMakeDirFailure() {
  try {
    deleteState();
    createSourceData();

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
            = stubContext.getContext();

    Configuration configuration = context.getConfiguration();
    String workPath = new Path("hftp://localhost:1234/*/*/*/?/")
            .makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString();
    configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
            workPath);
    copyMapper.setup(context);

    copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))),
            new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context);

    Assert.assertTrue("There should have been an exception.", false);
  }
  catch (Exception ignore) {
  }
}
 
Example 14
Source Project: hadoop   Source File: TestCopyMapper.java    License: Apache License 2.0 5 votes vote down vote up
@Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() {
  try {

    deleteState();
    createSourceDataWithDifferentBlockSize();

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
        = stubContext.getContext();

    Configuration configuration = context.getConfiguration();
    EnumSet<DistCpOptions.FileAttribute> fileAttributes
        = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
        DistCpUtils.packAttributes(fileAttributes));

    copyMapper.setup(context);

    for (Path path : pathList) {
      final FileStatus fileStatus = fs.getFileStatus(path);
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
          new CopyListingFileStatus(fileStatus), context);
    }

    Assert.fail("Copy should have failed because of block-size difference.");
  }
  catch (Exception exception) {
    // Check that the exception suggests the use of -pb/-skipCrc.
    Assert.assertTrue("Failure exception should have suggested the use of -pb.", exception.getCause().getCause().getMessage().contains("pb"));
    Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", exception.getCause().getCause().getMessage().contains("skipCrc"));
  }
}
 
Example 15
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveDefaults() throws IOException {
  FileSystem fs = FileSystem.get(config);
  
  // preserve replication, block size, user, group, permission, 
  // checksum type and timestamps    
  EnumSet<FileAttribute> attributes = 
      DistCpUtils.unpackAttributes(
          DistCpOptionSwitch.PRESERVE_STATUS_DEFAULT.substring(1));

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);
  
  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertTrue(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 16
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveNothingOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertTrue(dstStatus.getAccessTime() == 100);
  Assert.assertTrue(dstStatus.getModificationTime() == 100);
  Assert.assertTrue(dstStatus.getReplication() == 0);
}
 
Example 17
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreservePermissionOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.PERMISSION);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertTrue(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
}
 
Example 18
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveGroupOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.GROUP);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
}
 
Example 19
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveUserOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.USER);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
}
 
Example 20
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveReplicationOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.REPLICATION);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  // Replication shouldn't apply to dirs so this should still be 0 == 0
  Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 21
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveTimestampOnDirectory() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.TIMES);

  Path dst = new Path("/tmp/abc");
  Path src = new Path("/tmp/src");

  createDirectory(fs, src);
  createDirectory(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime());
}
 
Example 22
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveNothingOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 23
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreservePermissionOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.PERMISSION);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertTrue(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 24
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveGroupOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.GROUP);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 25
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveUserOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.USER);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 26
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveReplicationOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.REPLICATION);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 27
Source Project: hadoop   Source File: TestDistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPreserveTimestampOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.TIMES);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 28
Source Project: big-c   Source File: DynamicInputChunk.java    License: Apache License 2.0 5 votes vote down vote up
private void openForWrite() throws IOException {
  writer = SequenceFile.createWriter(
          chunkFilePath.getFileSystem(configuration), configuration,
          chunkFilePath, Text.class, CopyListingFileStatus.class,
          SequenceFile.CompressionType.NONE);

}
 
Example 29
Source Project: big-c   Source File: DistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Converts a FileStatus to a CopyListingFileStatus.  If preserving ACLs,
 * populates the CopyListingFileStatus with the ACLs. If preserving XAttrs,
 * populates the CopyListingFileStatus with the XAttrs.
 *
 * @param fileSystem FileSystem containing the file
 * @param fileStatus FileStatus of file
 * @param preserveAcls boolean true if preserving ACLs
 * @param preserveXAttrs boolean true if preserving XAttrs
 * @param preserveRawXAttrs boolean true if preserving raw.* XAttrs
 * @throws IOException if there is an I/O error
 */
public static CopyListingFileStatus toCopyListingFileStatus(
    FileSystem fileSystem, FileStatus fileStatus, boolean preserveAcls, 
    boolean preserveXAttrs, boolean preserveRawXAttrs) throws IOException {
  CopyListingFileStatus copyListingFileStatus =
    new CopyListingFileStatus(fileStatus);
  if (preserveAcls) {
    FsPermission perm = fileStatus.getPermission();
    if (perm.getAclBit()) {
      List<AclEntry> aclEntries = fileSystem.getAclStatus(
        fileStatus.getPath()).getEntries();
      copyListingFileStatus.setAclEntries(aclEntries);
    }
  }
  if (preserveXAttrs || preserveRawXAttrs) {
    Map<String, byte[]> srcXAttrs = fileSystem.getXAttrs(fileStatus.getPath());
    if (preserveXAttrs && preserveRawXAttrs) {
       copyListingFileStatus.setXAttrs(srcXAttrs);
    } else {
      Map<String, byte[]> trgXAttrs = Maps.newHashMap();
      final String rawNS =
          StringUtils.toLowerCase(XAttr.NameSpace.RAW.name());
      for (Map.Entry<String, byte[]> ent : srcXAttrs.entrySet()) {
        final String xattrName = ent.getKey();
        if (xattrName.startsWith(rawNS)) {
          if (preserveRawXAttrs) {
            trgXAttrs.put(xattrName, ent.getValue());
          }
        } else if (preserveXAttrs) {
          trgXAttrs.put(xattrName, ent.getValue());
        }
      }
      copyListingFileStatus.setXAttrs(trgXAttrs);
    }
  }
  return copyListingFileStatus;
}
 
Example 30
Source Project: big-c   Source File: DistCpUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Sort sequence file containing FileStatus and Text as key and value respecitvely
 *
 * @param fs - File System
 * @param conf - Configuration
 * @param sourceListing - Source listing file
 * @return Path of the sorted file. Is source file with _sorted appended to the name
 * @throws IOException - Any exception during sort.
 */
public static Path sortListing(FileSystem fs, Configuration conf, Path sourceListing)
    throws IOException {
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, Text.class,
    CopyListingFileStatus.class, conf);
  Path output = new Path(sourceListing.toString() +  "_sorted");

  if (fs.exists(output)) {
    fs.delete(output, false);
  }

  sorter.sort(sourceListing, output);
  return output;
}