Java Code Examples for org.apache.hadoop.tools.DistCpOptions

The following examples show how to use org.apache.hadoop.tools.DistCpOptions. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: circus-train   Source File: CircusTrainCopyListing.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException {
  try (Writer writer = newWriter(pathToListFile)) {

    Path sourceRootPath = getRootPath(getConf());

    for (Path sourcePath : options.getSourcePaths()) {

      FileSystem fileSystem = sourcePath.getFileSystem(getConf());
      FileStatus directory = fileSystem.getFileStatus(sourcePath);

      Map<String, CopyListingFileStatus> children = new FileStatusTreeTraverser(fileSystem)
          .preOrderTraversal(directory)
          .transform(new CopyListingFileStatusFunction(fileSystem, options))
          .uniqueIndex(new RelativePathFunction(sourceRootPath));

      for (Entry<String, CopyListingFileStatus> entry : children.entrySet()) {
        LOG.debug("Adding '{}' with relative path '{}'", entry.getValue().getPath(), entry.getKey());
        writer.append(new Text(entry.getKey()), entry.getValue());
        writer.sync();
      }
    }
  }
}
 
Example 2
Source Project: circus-train   Source File: DistCpOptionsParserTest.java    License: Apache License 2.0 6 votes vote down vote up
private void assertDefaultValues(DistCpOptions distCpOptions) {
  assertThat(distCpOptions, is(not(nullValue())));
  assertThat(distCpOptions.preserveAttributes().hasNext(), is(false));
  assertThat(distCpOptions.shouldPreserveRawXattrs(), is(false));
  assertThat(distCpOptions.shouldAppend(), is(false));
  assertThat(distCpOptions.shouldAtomicCommit(), is(false));
  assertThat(distCpOptions.getAtomicWorkPath(), is(nullValue()));
  assertThat(distCpOptions.shouldBlock(), is(true));
  assertThat(distCpOptions.getCopyStrategy(), is(DistCpConstants.UNIFORMSIZE));
  assertThat(distCpOptions.shouldDeleteMissing(), is(false));
  assertThat(distCpOptions.shouldIgnoreFailures(), is(false));
  assertThat(distCpOptions.getLogPath(), is(nullValue()));
  assertThat(distCpOptions.getMapBandwidth(), is(DistCpConstants.DEFAULT_BANDWIDTH_MB));
  assertThat(distCpOptions.getMaxMaps(), is(DistCpConstants.DEFAULT_MAPS));
  assertThat(distCpOptions.shouldOverwrite(), is(false));
  assertThat(distCpOptions.shouldSkipCRC(), is(false));
  assertThat(distCpOptions.getSslConfigurationFile(), is(nullValue()));
  assertThat(distCpOptions.shouldSyncFolder(), is(false));
  assertThat(distCpOptions.getTargetPathExists(), is(true));
}
 
Example 3
Source Project: hadoop   Source File: CopyMapper.java    License: Apache License 2.0 6 votes vote down vote up
private void copyFileWithRetry(String description,
    FileStatus sourceFileStatus, Path target, Context context,
    FileAction action, EnumSet<DistCpOptions.FileAttribute> fileAttributes)
    throws IOException {
  long bytesCopied;
  try {
    bytesCopied = (Long) new RetriableFileCopyCommand(skipCrc, description,
        action).execute(sourceFileStatus, target, context, fileAttributes);
  } catch (Exception e) {
    context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
    throw new IOException("File copy failed: " + sourceFileStatus.getPath() +
        " --> " + target, e);
  }
  incrementCounter(context, Counter.BYTESEXPECTED, sourceFileStatus.getLen());
  incrementCounter(context, Counter.BYTESCOPIED, bytesCopied);
  incrementCounter(context, Counter.COPY, 1);
}
 
Example 4
Source Project: big-c   Source File: CopyMapper.java    License: Apache License 2.0 6 votes vote down vote up
private void copyFileWithRetry(String description,
    FileStatus sourceFileStatus, Path target, Context context,
    FileAction action, EnumSet<DistCpOptions.FileAttribute> fileAttributes)
    throws IOException {
  long bytesCopied;
  try {
    bytesCopied = (Long) new RetriableFileCopyCommand(skipCrc, description,
        action).execute(sourceFileStatus, target, context, fileAttributes);
  } catch (Exception e) {
    context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
    throw new IOException("File copy failed: " + sourceFileStatus.getPath() +
        " --> " + target, e);
  }
  incrementCounter(context, Counter.BYTESEXPECTED, sourceFileStatus.getLen());
  incrementCounter(context, Counter.BYTESCOPIED, bytesCopied);
  incrementCounter(context, Counter.COPY, 1);
}
 
Example 5
Source Project: terrapin   Source File: BaseUploaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected DistCp getDistCp(Configuration conf, DistCpOptions options) {
  assertEquals(Constants.MAPRED_MAP_MAX_ATTEMPTS,
      Integer.parseInt(conf.get("mapred.map.max.attempts")));
  assertEquals(Constants.CHECKSUM_BYTES,
      Integer.parseInt(conf.get("io.bytes.per.checksum")));
  long blockSizeExpected = blockSize;
  if (blockSizeExpected % Constants.CHECKSUM_BYTES != 0) {
    blockSizeExpected = (blockSize / Constants.CHECKSUM_BYTES + 1) * Constants.CHECKSUM_BYTES;
  }
  assertEquals(blockSizeExpected, Long.parseLong(conf.get("dfs.block.size")));
  assertEquals(REPLICA_FACTOR, Integer.parseInt(conf.get("dfs.replication")));
  assertEquals(sourceFiles, options.getSourcePaths());
  assertTrue(options.shouldSkipCRC());
  assertTrue(options.shouldSyncFolder());
  assertTrue(options.getTargetPath().toString().startsWith("hdfs://" + NAME_NODE + HDFS_DIR));
  if (numPartition == 1) {
    assertTrue(options.getTargetPath().toString()
        .endsWith(TerrapinUtil.formatPartitionName(0)));
  }
  return distCp;
}
 
Example 6
Source Project: hbase   Source File: MapReduceBackupCopyJob.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private List<Path> getSourcePaths(Field fieldInputOptions) throws IOException{
  Object options;
  try {
    options = fieldInputOptions.get(this);
    if (options instanceof DistCpOptions) {
      return ((DistCpOptions) options).getSourcePaths();
    } else {
      // Hadoop 3
      Class<?> classContext = Class.forName("org.apache.hadoop.tools.DistCpContext");
      Method methodGetSourcePaths = classContext.getDeclaredMethod("getSourcePaths");
      methodGetSourcePaths.setAccessible(true);

      return (List<Path>) methodGetSourcePaths.invoke(options);
    }
  } catch (IllegalArgumentException | IllegalAccessException |
            ClassNotFoundException | NoSuchMethodException |
            SecurityException | InvocationTargetException e) {
    throw new IOException(e);
  }

}
 
Example 7
Source Project: circus-train   Source File: DistCpCopier.java    License: Apache License 2.0 5 votes vote down vote up
private DistCpOptions parseCopierOptions(Map<String, Object> copierOptions) {
  DistCpOptionsParser distCpOptionsParser;
  if (sourceDataLocations.isEmpty()) {
    LOG.debug("Will copy all sub-paths.");
    distCpOptionsParser = new DistCpOptionsParser(singletonList(sourceDataBaseLocation), replicaDataLocation);
  } else {
    LOG.debug("Will copy {} sub-paths.", sourceDataLocations.size());
    distCpOptionsParser = new DistCpOptionsParser(sourceDataLocations, replicaDataLocation);
  }
  return distCpOptionsParser.parse(copierOptions);
}
 
Example 8
Source Project: circus-train   Source File: DistCpCopier.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Metrics copy() throws CircusTrainException {
  LOG.info("Copying table data.");
  LOG.debug("Invoking DistCp: {} -> {}", sourceDataBaseLocation, replicaDataLocation);

  DistCpOptions distCpOptions = parseCopierOptions(copierOptions);
  LOG.debug("Invoking DistCp with options: {}", distCpOptions);

  CircusTrainCopyListing.setAsCopyListingClass(conf);
  CircusTrainCopyListing.setRootPath(conf, sourceDataBaseLocation);

  try {
    distCpOptions.setBlocking(false);
    Job job = executor.exec(conf, distCpOptions);
    String counter = String
        .format("%s_BYTES_WRITTEN", replicaDataLocation.toUri().getScheme().toUpperCase(Locale.ROOT));
    registerRunningJobMetrics(job, counter);
    if (!job.waitForCompletion(true)) {
      throw new IOException(
          "DistCp failure: Job " + job.getJobID() + " has failed: " + job.getStatus().getFailureInfo());
    }

    return new JobMetrics(job, FileSystemCounter.class.getName(), counter);
  } catch (Exception e) {
    cleanUpReplicaDataLocation();
    throw new CircusTrainException("Unable to copy file(s)", e);
  }
}
 
Example 9
Source Project: circus-train   Source File: CircusTrainCopyListingTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void typical() throws IOException {
  File input = temp.newFolder("input");
  File inputSub2 = new File(input, "sub1/sub2");
  inputSub2.mkdirs();
  Files.asCharSink(new File(inputSub2, "data"), UTF_8).write("test1");

  File listFile = temp.newFile("listFile");
  Path pathToListFile = new Path(listFile.toURI());

  List<Path> sourceDataLocations = new ArrayList<>();
  sourceDataLocations.add(new Path(inputSub2.toURI()));
  DistCpOptions options = new DistCpOptions(sourceDataLocations, new Path("dummy"));

  CircusTrainCopyListing.setRootPath(conf, new Path(input.toURI()));
  CircusTrainCopyListing copyListing = new CircusTrainCopyListing(conf, null);
  copyListing.doBuildListing(pathToListFile, options);

  try (Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(pathToListFile))) {
    Text key = new Text();
    CopyListingFileStatus value = new CopyListingFileStatus();

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2"));

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2/data"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2/data"));

    assertFalse(reader.next(key, value));

  }
}
 
Example 10
Source Project: circus-train   Source File: DistCpOptionsParserTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void typical() {
  Map<String, Object> options = defaultOptions();
  options.put(FILE_ATTRIBUTES, Arrays.asList("replication", "blocksize", "user", "group", "permission",
      "checksumtype", "acl", "xattr", "times"));
  options.put(PRESERVE_RAW_XATTRS, "true");
  options.put(ATOMIC_COMMIT, "false");
  options.put(ATOMIC_WORK_PATH, "atomic-work-path");
  options.put(COPY_STRATEGY, "copy-strategy");
  options.put(IGNORE_FAILURES, "true");
  options.put(LOG_PATH, "log-path");
  options.put(TASK_BANDWIDTH, "500");
  options.put(MAX_MAPS, "2");
  options.put(SKIP_CRC, "false");
  options.put(SSL_CONFIGURATION_FILE, "ssl-configuration-file");
  DistCpOptions distCpOptions = parser.parse(options);
  for (FileAttribute attribute : FileAttribute.values()) {
    assertThat(distCpOptions.shouldPreserve(attribute), is(true));
  }
  assertThat(distCpOptions.shouldPreserveRawXattrs(), is(true));
  assertThat(distCpOptions.shouldAppend(), is(false));
  assertThat(distCpOptions.shouldAtomicCommit(), is(false));
  assertThat(distCpOptions.getAtomicWorkPath(), is(new Path("atomic-work-path")));
  assertThat(distCpOptions.shouldBlock(), is(true));
  assertThat(distCpOptions.getCopyStrategy(), is("copy-strategy"));
  assertThat(distCpOptions.shouldDeleteMissing(), is(false));
  assertThat(distCpOptions.shouldIgnoreFailures(), is(true));
  assertThat(distCpOptions.getLogPath(), is(new Path("log-path")));
  assertThat(distCpOptions.getMapBandwidth(), is(500));
  assertThat(distCpOptions.getMaxMaps(), is(2));
  assertThat(distCpOptions.shouldOverwrite(), is(false));
  assertThat(distCpOptions.shouldSkipCRC(), is(false));
  assertThat(distCpOptions.getSslConfigurationFile(), is("ssl-configuration-file"));
  assertThat(distCpOptions.shouldSyncFolder(), is(false));
  assertThat(distCpOptions.getTargetPathExists(), is(true));
}
 
Example 11
Source Project: hadoop   Source File: TestCopyMapper.java    License: Apache License 2.0 5 votes vote down vote up
@Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() {
  try {

    deleteState();
    createSourceDataWithDifferentBlockSize();

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
        = stubContext.getContext();

    Configuration configuration = context.getConfiguration();
    EnumSet<DistCpOptions.FileAttribute> fileAttributes
        = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
        DistCpUtils.packAttributes(fileAttributes));

    copyMapper.setup(context);

    for (Path path : pathList) {
      final FileStatus fileStatus = fs.getFileStatus(path);
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
          new CopyListingFileStatus(fileStatus), context);
    }

    Assert.fail("Copy should have failed because of block-size difference.");
  }
  catch (Exception exception) {
    // Check that the exception suggests the use of -pb/-skipCrc.
    Assert.assertTrue("Failure exception should have suggested the use of -pb.", exception.getCause().getCause().getMessage().contains("pb"));
    Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", exception.getCause().getCause().getMessage().contains("skipCrc"));
  }
}
 
Example 12
Source Project: hadoop   Source File: TestUniformSizeInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
private static DistCpOptions getOptions(int nMaps) throws Exception {
  Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
                             + "/tmp/source");
  Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
                             + "/tmp/target");

  List<Path> sourceList = new ArrayList<Path>();
  sourceList.add(sourcePath);
  final DistCpOptions distCpOptions = new DistCpOptions(sourceList, targetPath);
  distCpOptions.setMaxMaps(nMaps);
  return distCpOptions;
}
 
Example 13
Source Project: hadoop   Source File: TestDynamicInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
private static DistCpOptions getOptions() throws Exception {
  Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
          + "/tmp/source");
  Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
          + "/tmp/target");

  List<Path> sourceList = new ArrayList<Path>();
  sourceList.add(sourcePath);
  DistCpOptions options = new DistCpOptions(sourceList, targetPath);
  options.setMaxMaps(NUM_SPLITS);
  return options;
}
 
Example 14
Source Project: big-c   Source File: TestCopyMapper.java    License: Apache License 2.0 5 votes vote down vote up
@Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() {
  try {

    deleteState();
    createSourceDataWithDifferentBlockSize();

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
        = stubContext.getContext();

    Configuration configuration = context.getConfiguration();
    EnumSet<DistCpOptions.FileAttribute> fileAttributes
        = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
        DistCpUtils.packAttributes(fileAttributes));

    copyMapper.setup(context);

    for (Path path : pathList) {
      final FileStatus fileStatus = fs.getFileStatus(path);
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
          new CopyListingFileStatus(fileStatus), context);
    }

    Assert.fail("Copy should have failed because of block-size difference.");
  }
  catch (Exception exception) {
    // Check that the exception suggests the use of -pb/-skipCrc.
    Assert.assertTrue("Failure exception should have suggested the use of -pb.", exception.getCause().getCause().getMessage().contains("pb"));
    Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", exception.getCause().getCause().getMessage().contains("skipCrc"));
  }
}
 
Example 15
Source Project: big-c   Source File: TestUniformSizeInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
private static DistCpOptions getOptions(int nMaps) throws Exception {
  Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
                             + "/tmp/source");
  Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
                             + "/tmp/target");

  List<Path> sourceList = new ArrayList<Path>();
  sourceList.add(sourcePath);
  final DistCpOptions distCpOptions = new DistCpOptions(sourceList, targetPath);
  distCpOptions.setMaxMaps(nMaps);
  return distCpOptions;
}
 
Example 16
Source Project: big-c   Source File: TestDynamicInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
private static DistCpOptions getOptions() throws Exception {
  Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
          + "/tmp/source");
  Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
          + "/tmp/target");

  List<Path> sourceList = new ArrayList<Path>();
  sourceList.add(sourcePath);
  DistCpOptions options = new DistCpOptions(sourceList, targetPath);
  options.setMaxMaps(NUM_SPLITS);
  return options;
}
 
Example 17
Source Project: kylin   Source File: CubeMigrationCrossClusterCLI.java    License: Apache License 2.0 5 votes vote down vote up
protected void copyHDFSPath(String srcDir, Configuration srcConf, String dstDir, Configuration dstConf)
        throws Exception {
    logger.info("start to copy hdfs directory from {} to {}", srcDir, dstDir);
    DistCpOptions distCpOptions = OptionsParser.parse(new String[] { srcDir, dstDir });
    distCpOptions.preserve(DistCpOptions.FileAttribute.BLOCKSIZE);
    distCpOptions.setBlocking(true);
    setTargetPathExists(distCpOptions);
    DistCp distCp = new DistCp(getConfOfDistCp(), distCpOptions);
    distCp.execute();
    logger.info("copied hdfs directory from {} to {}", srcDir, dstDir);
}
 
Example 18
Source Project: kylin   Source File: CubeMigrationCrossClusterCLI.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Set targetPathExists in both inputOptions and job config,
 * for the benefit of CopyCommitter
 */
public void setTargetPathExists(DistCpOptions inputOptions) throws IOException {
    Path target = inputOptions.getTargetPath();
    FileSystem targetFS = target.getFileSystem(dstCluster.jobConf);
    boolean targetExists = targetFS.exists(target);
    inputOptions.setTargetPathExists(targetExists);
    dstCluster.jobConf.setBoolean(DistCpConstants.CONF_LABEL_TARGET_PATH_EXISTS, targetExists);
}
 
Example 19
Source Project: circus-train   Source File: DistCpOptionsParser.java    License: Apache License 2.0 4 votes vote down vote up
DistCpOptionsParser(List<Path> sourceDataLocations, Path replicaDataLocation) {
  distCpOptions = new DistCpOptions(sourceDataLocations, replicaDataLocation);
}
 
Example 20
Source Project: circus-train   Source File: DistCpOptionsParser.java    License: Apache License 2.0 4 votes vote down vote up
protected DistCpOptions parse(Map<String, Object> copierOptions) {
  if (copierOptions == null) {
    LOG.debug("Null copier options: nothing to parse");
    return distCpOptions;
  }

  List<FileAttribute> fileAttributes = MoreMapUtils.getListOfEnum(copierOptions, FILE_ATTRIBUTES,
      Collections.<FileAttribute>emptyList(), FileAttribute.class);
  for (FileAttribute fileAttribute : fileAttributes) {
    distCpOptions.preserve(fileAttribute);
  }
  if (MapUtils.getBoolean(copierOptions, PRESERVE_RAW_XATTRS, distCpOptions.shouldPreserveRawXattrs())) {
    distCpOptions.preserveRawXattrs();
  }
  distCpOptions.setAtomicWorkPath(
      MoreMapUtils.getHadoopPath(copierOptions, ATOMIC_WORK_PATH, distCpOptions.getAtomicWorkPath()));
  distCpOptions.setCopyStrategy(MapUtils.getString(copierOptions, COPY_STRATEGY, distCpOptions.getCopyStrategy()));
  distCpOptions
      .setIgnoreFailures(MapUtils.getBoolean(copierOptions, IGNORE_FAILURES, distCpOptions.shouldIgnoreFailures()));
  distCpOptions.setLogPath(MoreMapUtils.getHadoopPath(copierOptions, LOG_PATH, distCpOptions.getLogPath()));

  int taskBandwidth = MapUtils.getIntValue(copierOptions, TASK_BANDWIDTH, distCpOptions.getMapBandwidth());
  if (taskBandwidth <= 0) {
    throw new IllegalArgumentException("Parameter " + TASK_BANDWIDTH + " must be a positive integer.");
  }
  distCpOptions.setMapBandwidth(taskBandwidth);

  int maxMaps = MapUtils.getIntValue(copierOptions, MAX_MAPS, distCpOptions.getMaxMaps());
  if (maxMaps <= 0) {
    throw new IllegalArgumentException("Parameter " + MAX_MAPS + " must be a positive integer.");
  }
  distCpOptions.setMaxMaps(maxMaps);

  distCpOptions.setSslConfigurationFile(
      MapUtils.getString(copierOptions, SSL_CONFIGURATION_FILE, distCpOptions.getSslConfigurationFile()));
  // These validate: order is important
  distCpOptions
      .setAtomicCommit(MapUtils.getBoolean(copierOptions, ATOMIC_COMMIT, distCpOptions.shouldAtomicCommit()));
  distCpOptions.setSkipCRC(MapUtils.getBoolean(copierOptions, SKIP_CRC, distCpOptions.shouldSkipCRC()));
  return distCpOptions;
}
 
Example 21
Source Project: circus-train   Source File: DistCpCopier.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public Job exec(Configuration conf, DistCpOptions options) throws Exception {
  return new DistCp(conf, options).execute();
}
 
Example 22
CopyListingFileStatusFunction(FileSystem fileSystem, DistCpOptions options) {
  this.fileSystem = fileSystem;
  preserveAcls = options.shouldPreserve(FileAttribute.ACL);
  preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
  preserveRawXAttrs = options.shouldPreserveRawXattrs();
}
 
Example 23
Source Project: hadoop   Source File: CopyMapper.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Implementation of the Mapper::map(). Does the copy.
 * @param relPath The target path.
 * @param sourceFileStatus The source path.
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public void map(Text relPath, CopyListingFileStatus sourceFileStatus,
        Context context) throws IOException, InterruptedException {
  Path sourcePath = sourceFileStatus.getPath();

  if (LOG.isDebugEnabled())
    LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);

  Path target = new Path(targetWorkPath.makeQualified(targetFS.getUri(),
                        targetFS.getWorkingDirectory()) + relPath.toString());

  EnumSet<DistCpOptions.FileAttribute> fileAttributes
          = getFileAttributeSettings(context);
  final boolean preserveRawXattrs = context.getConfiguration().getBoolean(
      DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);

  final String description = "Copying " + sourcePath + " to " + target;
  context.setStatus(description);

  LOG.info(description);

  try {
    CopyListingFileStatus sourceCurrStatus;
    FileSystem sourceFS;
    try {
      sourceFS = sourcePath.getFileSystem(conf);
      final boolean preserveXAttrs =
          fileAttributes.contains(FileAttribute.XATTR);
      sourceCurrStatus = DistCpUtils.toCopyListingFileStatus(sourceFS,
        sourceFS.getFileStatus(sourcePath),
        fileAttributes.contains(FileAttribute.ACL), 
        preserveXAttrs, preserveRawXattrs);
    } catch (FileNotFoundException e) {
      throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
    }

    FileStatus targetStatus = null;

    try {
      targetStatus = targetFS.getFileStatus(target);
    } catch (FileNotFoundException ignore) {
      if (LOG.isDebugEnabled())
        LOG.debug("Path could not be found: " + target, ignore);
    }

    if (targetStatus != null && (targetStatus.isDirectory() != sourceCurrStatus.isDirectory())) {
      throw new IOException("Can't replace " + target + ". Target is " +
          getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus));
    }

    if (sourceCurrStatus.isDirectory()) {
      createTargetDirsWithRetry(description, target, context);
      return;
    }

    FileAction action = checkUpdate(sourceFS, sourceCurrStatus, target);
    if (action == FileAction.SKIP) {
      LOG.info("Skipping copy of " + sourceCurrStatus.getPath()
               + " to " + target);
      updateSkipCounters(context, sourceCurrStatus);
      context.write(null, new Text("SKIP: " + sourceCurrStatus.getPath()));
    } else {
      copyFileWithRetry(description, sourceCurrStatus, target, context,
          action, fileAttributes);
    }

    DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus,
        fileAttributes, preserveRawXattrs);
  } catch (IOException exception) {
    handleFailures(exception, sourceFileStatus, target, context);
  }
}
 
Example 24
Source Project: hadoop   Source File: CopyMapper.java    License: Apache License 2.0 4 votes vote down vote up
private static EnumSet<DistCpOptions.FileAttribute>
        getFileAttributeSettings(Mapper.Context context) {
  String attributeString = context.getConfiguration().get(
          DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel());
  return DistCpUtils.unpackAttributes(attributeString);
}
 
Example 25
Source Project: hadoop   Source File: TestCopyMapper.java    License: Apache License 2.0 4 votes vote down vote up
private void testCopy(boolean preserveChecksum) throws Exception {
  deleteState();
  if (preserveChecksum) {
    createSourceDataWithDifferentChecksumType();
  } else {
    createSourceData();
  }

  FileSystem fs = cluster.getFileSystem();
  CopyMapper copyMapper = new CopyMapper();
  StubContext stubContext = new StubContext(getConfiguration(), null, 0);
  Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
          = stubContext.getContext();

  Configuration configuration = context.getConfiguration();
  EnumSet<DistCpOptions.FileAttribute> fileAttributes
          = EnumSet.of(DistCpOptions.FileAttribute.REPLICATION);
  if (preserveChecksum) {
    fileAttributes.add(DistCpOptions.FileAttribute.CHECKSUMTYPE);
  }
  configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
          DistCpUtils.packAttributes(fileAttributes));

  copyMapper.setup(context);

  for (Path path: pathList) {
    copyMapper.map(
        new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
        new CopyListingFileStatus(fs.getFileStatus(path)), context);
  }

  // Check that the maps worked.
  verifyCopy(fs, preserveChecksum);
  Assert.assertEquals(pathList.size(), stubContext.getReporter()
      .getCounter(CopyMapper.Counter.COPY).getValue());
  if (!preserveChecksum) {
    Assert.assertEquals(nFiles * DEFAULT_FILE_SIZE, stubContext
        .getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED)
        .getValue());
  } else {
    Assert.assertEquals(nFiles * NON_DEFAULT_BLOCK_SIZE * 2, stubContext
        .getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED)
        .getValue());
  }

  testCopyingExistingFiles(fs, copyMapper, context);
  for (Text value : stubContext.getWriter().values()) {
    Assert.assertTrue(value.toString() + " is not skipped", value
        .toString().startsWith("SKIP:"));
  }
}
 
Example 26
Source Project: hadoop   Source File: TestUniformSizeInputFormat.java    License: Apache License 2.0 4 votes vote down vote up
public void testGetSplits(int nMaps) throws Exception {
  DistCpOptions options = getOptions(nMaps);
  Configuration configuration = new Configuration();
  configuration.set("mapred.map.tasks",
                    String.valueOf(options.getMaxMaps()));
  Path listFile = new Path(cluster.getFileSystem().getUri().toString()
      + "/tmp/testGetSplits_1/fileList.seq");
  CopyListing.getCopyListing(configuration, CREDENTIALS, options).
      buildListing(listFile, options);

  JobContext jobContext = new JobContextImpl(configuration, new JobID());
  UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
  List<InputSplit> splits
          = uniformSizeInputFormat.getSplits(jobContext);

  int sizePerMap = totalFileSize/nMaps;

  checkSplits(listFile, splits);

  int doubleCheckedTotalSize = 0;
  int previousSplitSize = -1;
  for (int i=0; i<splits.size(); ++i) {
    InputSplit split = splits.get(i);
    int currentSplitSize = 0;
    RecordReader<Text, CopyListingFileStatus> recordReader =
      uniformSizeInputFormat.createRecordReader(split, null);
    StubContext stubContext = new StubContext(jobContext.getConfiguration(),
                                              recordReader, 0);
    final TaskAttemptContext taskAttemptContext
       = stubContext.getContext();
    recordReader.initialize(split, taskAttemptContext);
    while (recordReader.nextKeyValue()) {
      Path sourcePath = recordReader.getCurrentValue().getPath();
      FileSystem fs = sourcePath.getFileSystem(configuration);
      FileStatus fileStatus [] = fs.listStatus(sourcePath);
      if (fileStatus.length > 1) {
        continue;
      }
      currentSplitSize += fileStatus[0].getLen();
    }
    Assert.assertTrue(
         previousSplitSize == -1
             || Math.abs(currentSplitSize - previousSplitSize) < 0.1*sizePerMap
             || i == splits.size()-1);

    doubleCheckedTotalSize += currentSplitSize;
  }

  Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}
 
Example 27
Source Project: hadoop   Source File: TestCopyCommitter.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testPreserveStatus() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  Configuration conf = jobContext.getConfiguration();


  String sourceBase;
  String targetBase;
  FileSystem fs = null;
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    fs = FileSystem.get(conf);
    FsPermission sourcePerm = new FsPermission((short) 511);
    FsPermission initialPerm = new FsPermission((short) 448);
    sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
    targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm);

    DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
        new Path("/out"));
    options.preserve(FileAttribute.PERMISSION);
    options.appendToConf(conf);
    options.setTargetPathExists(false);
    
    CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
    Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
    listing.buildListing(listingFile, options);

    conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);

    committer.commitJob(jobContext);
    if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
      Assert.fail("Permission don't match");
    }

    //Test for idempotent commit
    committer.commitJob(jobContext);
    if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
      Assert.fail("Permission don't match");
    }

  } catch (IOException e) {
    LOG.error("Exception encountered while testing for preserve status", e);
    Assert.fail("Preserve status failure");
  } finally {
    TestDistCpUtils.delete(fs, "/tmp1");
    conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
  }

}
 
Example 28
Source Project: hadoop   Source File: TestCopyCommitter.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testDeleteMissing() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  Configuration conf = jobContext.getConfiguration();

  String sourceBase;
  String targetBase;
  FileSystem fs = null;
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    fs = FileSystem.get(conf);
    sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
    targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
    String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
    fs.rename(new Path(targetBaseAdd), new Path(targetBase));

    DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
        new Path("/out"));
    options.setSyncFolder(true);
    options.setDeleteMissing(true);
    options.appendToConf(conf);

    CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
    Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
    listing.buildListing(listingFile, options);

    conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
    conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);

    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
      Assert.fail("Source and target folders are not in sync");
    }

    //Test for idempotent commit
    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
  } catch (Throwable e) {
    LOG.error("Exception encountered while testing for delete missing", e);
    Assert.fail("Delete missing failure");
  } finally {
    TestDistCpUtils.delete(fs, "/tmp1");
    conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
  }
}
 
Example 29
Source Project: hadoop   Source File: TestCopyCommitter.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testDeleteMissingFlatInterleavedFiles() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  Configuration conf = jobContext.getConfiguration();


  String sourceBase;
  String targetBase;
  FileSystem fs = null;
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    fs = FileSystem.get(conf);
    sourceBase = "/tmp1/" + String.valueOf(rand.nextLong());
    targetBase = "/tmp1/" + String.valueOf(rand.nextLong());
    TestDistCpUtils.createFile(fs, sourceBase + "/1");
    TestDistCpUtils.createFile(fs, sourceBase + "/3");
    TestDistCpUtils.createFile(fs, sourceBase + "/4");
    TestDistCpUtils.createFile(fs, sourceBase + "/5");
    TestDistCpUtils.createFile(fs, sourceBase + "/7");
    TestDistCpUtils.createFile(fs, sourceBase + "/8");
    TestDistCpUtils.createFile(fs, sourceBase + "/9");

    TestDistCpUtils.createFile(fs, targetBase + "/2");
    TestDistCpUtils.createFile(fs, targetBase + "/4");
    TestDistCpUtils.createFile(fs, targetBase + "/5");
    TestDistCpUtils.createFile(fs, targetBase + "/7");
    TestDistCpUtils.createFile(fs, targetBase + "/9");
    TestDistCpUtils.createFile(fs, targetBase + "/A");

    DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), 
        new Path("/out"));
    options.setSyncFolder(true);
    options.setDeleteMissing(true);
    options.appendToConf(conf);

    CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
    Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
    listing.buildListing(listingFile, options);

    conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
    conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);

    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);

    //Test for idempotent commit
    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
  } catch (IOException e) {
    LOG.error("Exception encountered while testing for delete missing", e);
    Assert.fail("Delete missing failure");
  } finally {
    TestDistCpUtils.delete(fs, "/tmp1");
    conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
  }

}
 
Example 30
Source Project: hadoop   Source File: TestDynamicInputFormat.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetSplits() throws Exception {
  DistCpOptions options = getOptions();
  Configuration configuration = new Configuration();
  configuration.set("mapred.map.tasks",
                    String.valueOf(options.getMaxMaps()));
  CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
          new Path(cluster.getFileSystem().getUri().toString()
                  +"/tmp/testDynInputFormat/fileList.seq"), options);

  JobContext jobContext = new JobContextImpl(configuration, new JobID());
  DynamicInputFormat<Text, CopyListingFileStatus> inputFormat =
      new DynamicInputFormat<Text, CopyListingFileStatus>();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);

  int nFiles = 0;
  int taskId = 0;

  for (InputSplit split : splits) {
    RecordReader<Text, CopyListingFileStatus> recordReader =
         inputFormat.createRecordReader(split, null);
    StubContext stubContext = new StubContext(jobContext.getConfiguration(),
                                              recordReader, taskId);
    final TaskAttemptContext taskAttemptContext
       = stubContext.getContext();
    
    recordReader.initialize(splits.get(0), taskAttemptContext);
    float previousProgressValue = 0f;
    while (recordReader.nextKeyValue()) {
      CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
      String source = fileStatus.getPath().toString();
      System.out.println(source);
      Assert.assertTrue(expectedFilePaths.contains(source));
      final float progress = recordReader.getProgress();
      Assert.assertTrue(progress >= previousProgressValue);
      Assert.assertTrue(progress >= 0.0f);
      Assert.assertTrue(progress <= 1.0f);
      previousProgressValue = progress;
      ++nFiles;
    }
    Assert.assertTrue(recordReader.getProgress() == 1.0f);

    ++taskId;
  }

  Assert.assertEquals(expectedFilePaths.size(), nFiles);
}