org.apache.hadoop.tools.DistCpOptions Java Examples

The following examples show how to use org.apache.hadoop.tools.DistCpOptions. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CopyMapper.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void copyFileWithRetry(String description,
    FileStatus sourceFileStatus, Path target, Context context,
    FileAction action, EnumSet<DistCpOptions.FileAttribute> fileAttributes)
    throws IOException {
  long bytesCopied;
  try {
    bytesCopied = (Long) new RetriableFileCopyCommand(skipCrc, description,
        action).execute(sourceFileStatus, target, context, fileAttributes);
  } catch (Exception e) {
    context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
    throw new IOException("File copy failed: " + sourceFileStatus.getPath() +
        " --> " + target, e);
  }
  incrementCounter(context, Counter.BYTESEXPECTED, sourceFileStatus.getLen());
  incrementCounter(context, Counter.BYTESCOPIED, bytesCopied);
  incrementCounter(context, Counter.COPY, 1);
}
 
Example #2
Source File: CopyMapper.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void copyFileWithRetry(String description,
    FileStatus sourceFileStatus, Path target, Context context,
    FileAction action, EnumSet<DistCpOptions.FileAttribute> fileAttributes)
    throws IOException {
  long bytesCopied;
  try {
    bytesCopied = (Long) new RetriableFileCopyCommand(skipCrc, description,
        action).execute(sourceFileStatus, target, context, fileAttributes);
  } catch (Exception e) {
    context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
    throw new IOException("File copy failed: " + sourceFileStatus.getPath() +
        " --> " + target, e);
  }
  incrementCounter(context, Counter.BYTESEXPECTED, sourceFileStatus.getLen());
  incrementCounter(context, Counter.BYTESCOPIED, bytesCopied);
  incrementCounter(context, Counter.COPY, 1);
}
 
Example #3
Source File: CircusTrainCopyListing.java    From circus-train with Apache License 2.0 6 votes vote down vote up
@Override
public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException {
  try (Writer writer = newWriter(pathToListFile)) {

    Path sourceRootPath = getRootPath(getConf());

    for (Path sourcePath : options.getSourcePaths()) {

      FileSystem fileSystem = sourcePath.getFileSystem(getConf());
      FileStatus directory = fileSystem.getFileStatus(sourcePath);

      Map<String, CopyListingFileStatus> children = new FileStatusTreeTraverser(fileSystem)
          .preOrderTraversal(directory)
          .transform(new CopyListingFileStatusFunction(fileSystem, options))
          .uniqueIndex(new RelativePathFunction(sourceRootPath));

      for (Entry<String, CopyListingFileStatus> entry : children.entrySet()) {
        LOG.debug("Adding '{}' with relative path '{}'", entry.getValue().getPath(), entry.getKey());
        writer.append(new Text(entry.getKey()), entry.getValue());
        writer.sync();
      }
    }
  }
}
 
Example #4
Source File: DistCpOptionsParserTest.java    From circus-train with Apache License 2.0 6 votes vote down vote up
private void assertDefaultValues(DistCpOptions distCpOptions) {
  assertThat(distCpOptions, is(not(nullValue())));
  assertThat(distCpOptions.preserveAttributes().hasNext(), is(false));
  assertThat(distCpOptions.shouldPreserveRawXattrs(), is(false));
  assertThat(distCpOptions.shouldAppend(), is(false));
  assertThat(distCpOptions.shouldAtomicCommit(), is(false));
  assertThat(distCpOptions.getAtomicWorkPath(), is(nullValue()));
  assertThat(distCpOptions.shouldBlock(), is(true));
  assertThat(distCpOptions.getCopyStrategy(), is(DistCpConstants.UNIFORMSIZE));
  assertThat(distCpOptions.shouldDeleteMissing(), is(false));
  assertThat(distCpOptions.shouldIgnoreFailures(), is(false));
  assertThat(distCpOptions.getLogPath(), is(nullValue()));
  assertThat(distCpOptions.getMapBandwidth(), is(DistCpConstants.DEFAULT_BANDWIDTH_MB));
  assertThat(distCpOptions.getMaxMaps(), is(DistCpConstants.DEFAULT_MAPS));
  assertThat(distCpOptions.shouldOverwrite(), is(false));
  assertThat(distCpOptions.shouldSkipCRC(), is(false));
  assertThat(distCpOptions.getSslConfigurationFile(), is(nullValue()));
  assertThat(distCpOptions.shouldSyncFolder(), is(false));
  assertThat(distCpOptions.getTargetPathExists(), is(true));
}
 
Example #5
Source File: BaseUploaderTest.java    From terrapin with Apache License 2.0 6 votes vote down vote up
@Override
protected DistCp getDistCp(Configuration conf, DistCpOptions options) {
  assertEquals(Constants.MAPRED_MAP_MAX_ATTEMPTS,
      Integer.parseInt(conf.get("mapred.map.max.attempts")));
  assertEquals(Constants.CHECKSUM_BYTES,
      Integer.parseInt(conf.get("io.bytes.per.checksum")));
  long blockSizeExpected = blockSize;
  if (blockSizeExpected % Constants.CHECKSUM_BYTES != 0) {
    blockSizeExpected = (blockSize / Constants.CHECKSUM_BYTES + 1) * Constants.CHECKSUM_BYTES;
  }
  assertEquals(blockSizeExpected, Long.parseLong(conf.get("dfs.block.size")));
  assertEquals(REPLICA_FACTOR, Integer.parseInt(conf.get("dfs.replication")));
  assertEquals(sourceFiles, options.getSourcePaths());
  assertTrue(options.shouldSkipCRC());
  assertTrue(options.shouldSyncFolder());
  assertTrue(options.getTargetPath().toString().startsWith("hdfs://" + NAME_NODE + HDFS_DIR));
  if (numPartition == 1) {
    assertTrue(options.getTargetPath().toString()
        .endsWith(TerrapinUtil.formatPartitionName(0)));
  }
  return distCp;
}
 
Example #6
Source File: MapReduceBackupCopyJob.java    From hbase with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private List<Path> getSourcePaths(Field fieldInputOptions) throws IOException{
  Object options;
  try {
    options = fieldInputOptions.get(this);
    if (options instanceof DistCpOptions) {
      return ((DistCpOptions) options).getSourcePaths();
    } else {
      // Hadoop 3
      Class<?> classContext = Class.forName("org.apache.hadoop.tools.DistCpContext");
      Method methodGetSourcePaths = classContext.getDeclaredMethod("getSourcePaths");
      methodGetSourcePaths.setAccessible(true);

      return (List<Path>) methodGetSourcePaths.invoke(options);
    }
  } catch (IllegalArgumentException | IllegalAccessException |
            ClassNotFoundException | NoSuchMethodException |
            SecurityException | InvocationTargetException e) {
    throw new IOException(e);
  }

}
 
Example #7
Source File: TestCopyMapper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() {
  try {

    deleteState();
    createSourceDataWithDifferentBlockSize();

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
        = stubContext.getContext();

    Configuration configuration = context.getConfiguration();
    EnumSet<DistCpOptions.FileAttribute> fileAttributes
        = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
        DistCpUtils.packAttributes(fileAttributes));

    copyMapper.setup(context);

    for (Path path : pathList) {
      final FileStatus fileStatus = fs.getFileStatus(path);
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
          new CopyListingFileStatus(fileStatus), context);
    }

    Assert.fail("Copy should have failed because of block-size difference.");
  }
  catch (Exception exception) {
    // Check that the exception suggests the use of -pb/-skipCrc.
    Assert.assertTrue("Failure exception should have suggested the use of -pb.", exception.getCause().getCause().getMessage().contains("pb"));
    Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", exception.getCause().getCause().getMessage().contains("skipCrc"));
  }
}
 
Example #8
Source File: TestCopyMapper.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() {
  try {

    deleteState();
    createSourceDataWithDifferentBlockSize();

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
        = stubContext.getContext();

    Configuration configuration = context.getConfiguration();
    EnumSet<DistCpOptions.FileAttribute> fileAttributes
        = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
        DistCpUtils.packAttributes(fileAttributes));

    copyMapper.setup(context);

    for (Path path : pathList) {
      final FileStatus fileStatus = fs.getFileStatus(path);
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
          new CopyListingFileStatus(fileStatus), context);
    }

    Assert.fail("Copy should have failed because of block-size difference.");
  }
  catch (Exception exception) {
    // Check that the exception suggests the use of -pb/-skipCrc.
    Assert.assertTrue("Failure exception should have suggested the use of -pb.", exception.getCause().getCause().getMessage().contains("pb"));
    Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", exception.getCause().getCause().getMessage().contains("skipCrc"));
  }
}
 
Example #9
Source File: TestUniformSizeInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static DistCpOptions getOptions(int nMaps) throws Exception {
  Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
                             + "/tmp/source");
  Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
                             + "/tmp/target");

  List<Path> sourceList = new ArrayList<Path>();
  sourceList.add(sourcePath);
  final DistCpOptions distCpOptions = new DistCpOptions(sourceList, targetPath);
  distCpOptions.setMaxMaps(nMaps);
  return distCpOptions;
}
 
Example #10
Source File: TestDynamicInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static DistCpOptions getOptions() throws Exception {
  Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
          + "/tmp/source");
  Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
          + "/tmp/target");

  List<Path> sourceList = new ArrayList<Path>();
  sourceList.add(sourcePath);
  DistCpOptions options = new DistCpOptions(sourceList, targetPath);
  options.setMaxMaps(NUM_SPLITS);
  return options;
}
 
Example #11
Source File: TestUniformSizeInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static DistCpOptions getOptions(int nMaps) throws Exception {
  Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
                             + "/tmp/source");
  Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
                             + "/tmp/target");

  List<Path> sourceList = new ArrayList<Path>();
  sourceList.add(sourcePath);
  final DistCpOptions distCpOptions = new DistCpOptions(sourceList, targetPath);
  distCpOptions.setMaxMaps(nMaps);
  return distCpOptions;
}
 
Example #12
Source File: DistCpCopier.java    From circus-train with Apache License 2.0 5 votes vote down vote up
private DistCpOptions parseCopierOptions(Map<String, Object> copierOptions) {
  DistCpOptionsParser distCpOptionsParser;
  if (sourceDataLocations.isEmpty()) {
    LOG.debug("Will copy all sub-paths.");
    distCpOptionsParser = new DistCpOptionsParser(singletonList(sourceDataBaseLocation), replicaDataLocation);
  } else {
    LOG.debug("Will copy {} sub-paths.", sourceDataLocations.size());
    distCpOptionsParser = new DistCpOptionsParser(sourceDataLocations, replicaDataLocation);
  }
  return distCpOptionsParser.parse(copierOptions);
}
 
Example #13
Source File: TestDynamicInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static DistCpOptions getOptions() throws Exception {
  Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
          + "/tmp/source");
  Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
          + "/tmp/target");

  List<Path> sourceList = new ArrayList<Path>();
  sourceList.add(sourcePath);
  DistCpOptions options = new DistCpOptions(sourceList, targetPath);
  options.setMaxMaps(NUM_SPLITS);
  return options;
}
 
Example #14
Source File: DistCpOptionsParserTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void typical() {
  Map<String, Object> options = defaultOptions();
  options.put(FILE_ATTRIBUTES, Arrays.asList("replication", "blocksize", "user", "group", "permission",
      "checksumtype", "acl", "xattr", "times"));
  options.put(PRESERVE_RAW_XATTRS, "true");
  options.put(ATOMIC_COMMIT, "false");
  options.put(ATOMIC_WORK_PATH, "atomic-work-path");
  options.put(COPY_STRATEGY, "copy-strategy");
  options.put(IGNORE_FAILURES, "true");
  options.put(LOG_PATH, "log-path");
  options.put(TASK_BANDWIDTH, "500");
  options.put(MAX_MAPS, "2");
  options.put(SKIP_CRC, "false");
  options.put(SSL_CONFIGURATION_FILE, "ssl-configuration-file");
  DistCpOptions distCpOptions = parser.parse(options);
  for (FileAttribute attribute : FileAttribute.values()) {
    assertThat(distCpOptions.shouldPreserve(attribute), is(true));
  }
  assertThat(distCpOptions.shouldPreserveRawXattrs(), is(true));
  assertThat(distCpOptions.shouldAppend(), is(false));
  assertThat(distCpOptions.shouldAtomicCommit(), is(false));
  assertThat(distCpOptions.getAtomicWorkPath(), is(new Path("atomic-work-path")));
  assertThat(distCpOptions.shouldBlock(), is(true));
  assertThat(distCpOptions.getCopyStrategy(), is("copy-strategy"));
  assertThat(distCpOptions.shouldDeleteMissing(), is(false));
  assertThat(distCpOptions.shouldIgnoreFailures(), is(true));
  assertThat(distCpOptions.getLogPath(), is(new Path("log-path")));
  assertThat(distCpOptions.getMapBandwidth(), is(500));
  assertThat(distCpOptions.getMaxMaps(), is(2));
  assertThat(distCpOptions.shouldOverwrite(), is(false));
  assertThat(distCpOptions.shouldSkipCRC(), is(false));
  assertThat(distCpOptions.getSslConfigurationFile(), is("ssl-configuration-file"));
  assertThat(distCpOptions.shouldSyncFolder(), is(false));
  assertThat(distCpOptions.getTargetPathExists(), is(true));
}
 
Example #15
Source File: CubeMigrationCrossClusterCLI.java    From kylin with Apache License 2.0 5 votes vote down vote up
/**
 * Set targetPathExists in both inputOptions and job config,
 * for the benefit of CopyCommitter
 */
public void setTargetPathExists(DistCpOptions inputOptions) throws IOException {
    Path target = inputOptions.getTargetPath();
    FileSystem targetFS = target.getFileSystem(dstCluster.jobConf);
    boolean targetExists = targetFS.exists(target);
    inputOptions.setTargetPathExists(targetExists);
    dstCluster.jobConf.setBoolean(DistCpConstants.CONF_LABEL_TARGET_PATH_EXISTS, targetExists);
}
 
Example #16
Source File: CircusTrainCopyListingTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Test
public void typical() throws IOException {
  File input = temp.newFolder("input");
  File inputSub2 = new File(input, "sub1/sub2");
  inputSub2.mkdirs();
  Files.asCharSink(new File(inputSub2, "data"), UTF_8).write("test1");

  File listFile = temp.newFile("listFile");
  Path pathToListFile = new Path(listFile.toURI());

  List<Path> sourceDataLocations = new ArrayList<>();
  sourceDataLocations.add(new Path(inputSub2.toURI()));
  DistCpOptions options = new DistCpOptions(sourceDataLocations, new Path("dummy"));

  CircusTrainCopyListing.setRootPath(conf, new Path(input.toURI()));
  CircusTrainCopyListing copyListing = new CircusTrainCopyListing(conf, null);
  copyListing.doBuildListing(pathToListFile, options);

  try (Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(pathToListFile))) {
    Text key = new Text();
    CopyListingFileStatus value = new CopyListingFileStatus();

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2"));

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2/data"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2/data"));

    assertFalse(reader.next(key, value));

  }
}
 
Example #17
Source File: CubeMigrationCrossClusterCLI.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected void copyHDFSPath(String srcDir, Configuration srcConf, String dstDir, Configuration dstConf)
        throws Exception {
    logger.info("start to copy hdfs directory from {} to {}", srcDir, dstDir);
    DistCpOptions distCpOptions = OptionsParser.parse(new String[] { srcDir, dstDir });
    distCpOptions.preserve(DistCpOptions.FileAttribute.BLOCKSIZE);
    distCpOptions.setBlocking(true);
    setTargetPathExists(distCpOptions);
    DistCp distCp = new DistCp(getConfOfDistCp(), distCpOptions);
    distCp.execute();
    logger.info("copied hdfs directory from {} to {}", srcDir, dstDir);
}
 
Example #18
Source File: DistCpCopier.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Override
public Metrics copy() throws CircusTrainException {
  LOG.info("Copying table data.");
  LOG.debug("Invoking DistCp: {} -> {}", sourceDataBaseLocation, replicaDataLocation);

  DistCpOptions distCpOptions = parseCopierOptions(copierOptions);
  LOG.debug("Invoking DistCp with options: {}", distCpOptions);

  CircusTrainCopyListing.setAsCopyListingClass(conf);
  CircusTrainCopyListing.setRootPath(conf, sourceDataBaseLocation);

  try {
    distCpOptions.setBlocking(false);
    Job job = executor.exec(conf, distCpOptions);
    String counter = String
        .format("%s_BYTES_WRITTEN", replicaDataLocation.toUri().getScheme().toUpperCase(Locale.ROOT));
    registerRunningJobMetrics(job, counter);
    if (!job.waitForCompletion(true)) {
      throw new IOException(
          "DistCp failure: Job " + job.getJobID() + " has failed: " + job.getStatus().getFailureInfo());
    }

    return new JobMetrics(job, FileSystemCounter.class.getName(), counter);
  } catch (Exception e) {
    cleanUpReplicaDataLocation();
    throw new CircusTrainException("Unable to copy file(s)", e);
  }
}
 
Example #19
Source File: TestCopyCommitter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testDeleteMissingFlatInterleavedFiles() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  Configuration conf = jobContext.getConfiguration();


  String sourceBase;
  String targetBase;
  FileSystem fs = null;
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    fs = FileSystem.get(conf);
    sourceBase = "/tmp1/" + String.valueOf(rand.nextLong());
    targetBase = "/tmp1/" + String.valueOf(rand.nextLong());
    TestDistCpUtils.createFile(fs, sourceBase + "/1");
    TestDistCpUtils.createFile(fs, sourceBase + "/3");
    TestDistCpUtils.createFile(fs, sourceBase + "/4");
    TestDistCpUtils.createFile(fs, sourceBase + "/5");
    TestDistCpUtils.createFile(fs, sourceBase + "/7");
    TestDistCpUtils.createFile(fs, sourceBase + "/8");
    TestDistCpUtils.createFile(fs, sourceBase + "/9");

    TestDistCpUtils.createFile(fs, targetBase + "/2");
    TestDistCpUtils.createFile(fs, targetBase + "/4");
    TestDistCpUtils.createFile(fs, targetBase + "/5");
    TestDistCpUtils.createFile(fs, targetBase + "/7");
    TestDistCpUtils.createFile(fs, targetBase + "/9");
    TestDistCpUtils.createFile(fs, targetBase + "/A");

    DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), 
        new Path("/out"));
    options.setSyncFolder(true);
    options.setDeleteMissing(true);
    options.appendToConf(conf);

    CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
    Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
    listing.buildListing(listingFile, options);

    conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
    conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);

    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);

    //Test for idempotent commit
    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
  } catch (IOException e) {
    LOG.error("Exception encountered while testing for delete missing", e);
    Assert.fail("Delete missing failure");
  } finally {
    TestDistCpUtils.delete(fs, "/tmp1");
    conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
  }

}
 
Example #20
Source File: TestDynamicInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetSplits() throws Exception {
  DistCpOptions options = getOptions();
  Configuration configuration = new Configuration();
  configuration.set("mapred.map.tasks",
                    String.valueOf(options.getMaxMaps()));
  CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
          new Path(cluster.getFileSystem().getUri().toString()
                  +"/tmp/testDynInputFormat/fileList.seq"), options);

  JobContext jobContext = new JobContextImpl(configuration, new JobID());
  DynamicInputFormat<Text, CopyListingFileStatus> inputFormat =
      new DynamicInputFormat<Text, CopyListingFileStatus>();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);

  int nFiles = 0;
  int taskId = 0;

  for (InputSplit split : splits) {
    RecordReader<Text, CopyListingFileStatus> recordReader =
         inputFormat.createRecordReader(split, null);
    StubContext stubContext = new StubContext(jobContext.getConfiguration(),
                                              recordReader, taskId);
    final TaskAttemptContext taskAttemptContext
       = stubContext.getContext();
    
    recordReader.initialize(splits.get(0), taskAttemptContext);
    float previousProgressValue = 0f;
    while (recordReader.nextKeyValue()) {
      CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
      String source = fileStatus.getPath().toString();
      System.out.println(source);
      Assert.assertTrue(expectedFilePaths.contains(source));
      final float progress = recordReader.getProgress();
      Assert.assertTrue(progress >= previousProgressValue);
      Assert.assertTrue(progress >= 0.0f);
      Assert.assertTrue(progress <= 1.0f);
      previousProgressValue = progress;
      ++nFiles;
    }
    Assert.assertTrue(recordReader.getProgress() == 1.0f);

    ++taskId;
  }

  Assert.assertEquals(expectedFilePaths.size(), nFiles);
}
 
Example #21
Source File: TestCopyCommitter.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testDeleteMissingFlatInterleavedFiles() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  Configuration conf = jobContext.getConfiguration();


  String sourceBase;
  String targetBase;
  FileSystem fs = null;
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    fs = FileSystem.get(conf);
    sourceBase = "/tmp1/" + String.valueOf(rand.nextLong());
    targetBase = "/tmp1/" + String.valueOf(rand.nextLong());
    TestDistCpUtils.createFile(fs, sourceBase + "/1");
    TestDistCpUtils.createFile(fs, sourceBase + "/3");
    TestDistCpUtils.createFile(fs, sourceBase + "/4");
    TestDistCpUtils.createFile(fs, sourceBase + "/5");
    TestDistCpUtils.createFile(fs, sourceBase + "/7");
    TestDistCpUtils.createFile(fs, sourceBase + "/8");
    TestDistCpUtils.createFile(fs, sourceBase + "/9");

    TestDistCpUtils.createFile(fs, targetBase + "/2");
    TestDistCpUtils.createFile(fs, targetBase + "/4");
    TestDistCpUtils.createFile(fs, targetBase + "/5");
    TestDistCpUtils.createFile(fs, targetBase + "/7");
    TestDistCpUtils.createFile(fs, targetBase + "/9");
    TestDistCpUtils.createFile(fs, targetBase + "/A");

    DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), 
        new Path("/out"));
    options.setSyncFolder(true);
    options.setDeleteMissing(true);
    options.appendToConf(conf);

    CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
    Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
    listing.buildListing(listingFile, options);

    conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
    conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);

    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);

    //Test for idempotent commit
    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
  } catch (IOException e) {
    LOG.error("Exception encountered while testing for delete missing", e);
    Assert.fail("Delete missing failure");
  } finally {
    TestDistCpUtils.delete(fs, "/tmp1");
    conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
  }

}
 
Example #22
Source File: BaseUploader.java    From terrapin with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
protected DistCp getDistCp(Configuration conf, DistCpOptions options) throws Exception {
  return new DistCp(conf, options);
}
 
Example #23
Source File: TestCopyCommitter.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testDeleteMissing() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  Configuration conf = jobContext.getConfiguration();

  String sourceBase;
  String targetBase;
  FileSystem fs = null;
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    fs = FileSystem.get(conf);
    sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
    targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
    String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
    fs.rename(new Path(targetBaseAdd), new Path(targetBase));

    DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
        new Path("/out"));
    options.setSyncFolder(true);
    options.setDeleteMissing(true);
    options.appendToConf(conf);

    CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
    Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
    listing.buildListing(listingFile, options);

    conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
    conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);

    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
      Assert.fail("Source and target folders are not in sync");
    }

    //Test for idempotent commit
    committer.commitJob(jobContext);
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
    if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
      Assert.fail("Source and target folders are not in sync");
    }
  } catch (Throwable e) {
    LOG.error("Exception encountered while testing for delete missing", e);
    Assert.fail("Delete missing failure");
  } finally {
    TestDistCpUtils.delete(fs, "/tmp1");
    conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
  }
}
 
Example #24
Source File: TestCopyCommitter.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreserveStatus() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  Configuration conf = jobContext.getConfiguration();


  String sourceBase;
  String targetBase;
  FileSystem fs = null;
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    fs = FileSystem.get(conf);
    FsPermission sourcePerm = new FsPermission((short) 511);
    FsPermission initialPerm = new FsPermission((short) 448);
    sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
    targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm);

    DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
        new Path("/out"));
    options.preserve(FileAttribute.PERMISSION);
    options.appendToConf(conf);
    options.setTargetPathExists(false);
    
    CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
    Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
    listing.buildListing(listingFile, options);

    conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);

    committer.commitJob(jobContext);
    if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
      Assert.fail("Permission don't match");
    }

    //Test for idempotent commit
    committer.commitJob(jobContext);
    if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
      Assert.fail("Permission don't match");
    }

  } catch (IOException e) {
    LOG.error("Exception encountered while testing for preserve status", e);
    Assert.fail("Preserve status failure");
  } finally {
    TestDistCpUtils.delete(fs, "/tmp1");
    conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
  }

}
 
Example #25
Source File: TestUniformSizeInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
public void testGetSplits(int nMaps) throws Exception {
  DistCpOptions options = getOptions(nMaps);
  Configuration configuration = new Configuration();
  configuration.set("mapred.map.tasks",
                    String.valueOf(options.getMaxMaps()));
  Path listFile = new Path(cluster.getFileSystem().getUri().toString()
      + "/tmp/testGetSplits_1/fileList.seq");
  CopyListing.getCopyListing(configuration, CREDENTIALS, options).
      buildListing(listFile, options);

  JobContext jobContext = new JobContextImpl(configuration, new JobID());
  UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
  List<InputSplit> splits
          = uniformSizeInputFormat.getSplits(jobContext);

  int sizePerMap = totalFileSize/nMaps;

  checkSplits(listFile, splits);

  int doubleCheckedTotalSize = 0;
  int previousSplitSize = -1;
  for (int i=0; i<splits.size(); ++i) {
    InputSplit split = splits.get(i);
    int currentSplitSize = 0;
    RecordReader<Text, CopyListingFileStatus> recordReader =
      uniformSizeInputFormat.createRecordReader(split, null);
    StubContext stubContext = new StubContext(jobContext.getConfiguration(),
                                              recordReader, 0);
    final TaskAttemptContext taskAttemptContext
       = stubContext.getContext();
    recordReader.initialize(split, taskAttemptContext);
    while (recordReader.nextKeyValue()) {
      Path sourcePath = recordReader.getCurrentValue().getPath();
      FileSystem fs = sourcePath.getFileSystem(configuration);
      FileStatus fileStatus [] = fs.listStatus(sourcePath);
      if (fileStatus.length > 1) {
        continue;
      }
      currentSplitSize += fileStatus[0].getLen();
    }
    Assert.assertTrue(
         previousSplitSize == -1
             || Math.abs(currentSplitSize - previousSplitSize) < 0.1*sizePerMap
             || i == splits.size()-1);

    doubleCheckedTotalSize += currentSplitSize;
  }

  Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}
 
Example #26
Source File: MapReduceBackupCopyJob.java    From hbase with Apache License 2.0 4 votes vote down vote up
public BackupDistCp(Configuration conf, DistCpOptions options, BackupInfo backupInfo,
    BackupManager backupManager) throws Exception {
  super(conf, options);
  this.backupInfo = backupInfo;
  this.backupManager = backupManager;
}
 
Example #27
Source File: TestCopyMapper.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void testCopy(boolean preserveChecksum) throws Exception {
  deleteState();
  if (preserveChecksum) {
    createSourceDataWithDifferentChecksumType();
  } else {
    createSourceData();
  }

  FileSystem fs = cluster.getFileSystem();
  CopyMapper copyMapper = new CopyMapper();
  StubContext stubContext = new StubContext(getConfiguration(), null, 0);
  Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
          = stubContext.getContext();

  Configuration configuration = context.getConfiguration();
  EnumSet<DistCpOptions.FileAttribute> fileAttributes
          = EnumSet.of(DistCpOptions.FileAttribute.REPLICATION);
  if (preserveChecksum) {
    fileAttributes.add(DistCpOptions.FileAttribute.CHECKSUMTYPE);
  }
  configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
          DistCpUtils.packAttributes(fileAttributes));

  copyMapper.setup(context);

  for (Path path: pathList) {
    copyMapper.map(
        new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
        new CopyListingFileStatus(fs.getFileStatus(path)), context);
  }

  // Check that the maps worked.
  verifyCopy(fs, preserveChecksum);
  Assert.assertEquals(pathList.size(), stubContext.getReporter()
      .getCounter(CopyMapper.Counter.COPY).getValue());
  if (!preserveChecksum) {
    Assert.assertEquals(nFiles * DEFAULT_FILE_SIZE, stubContext
        .getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED)
        .getValue());
  } else {
    Assert.assertEquals(nFiles * NON_DEFAULT_BLOCK_SIZE * 2, stubContext
        .getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED)
        .getValue());
  }

  testCopyingExistingFiles(fs, copyMapper, context);
  for (Text value : stubContext.getWriter().values()) {
    Assert.assertTrue(value.toString() + " is not skipped", value
        .toString().startsWith("SKIP:"));
  }
}
 
Example #28
Source File: CopyMapper.java    From big-c with Apache License 2.0 4 votes vote down vote up
private static EnumSet<DistCpOptions.FileAttribute>
        getFileAttributeSettings(Mapper.Context context) {
  String attributeString = context.getConfiguration().get(
          DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel());
  return DistCpUtils.unpackAttributes(attributeString);
}
 
Example #29
Source File: CopyMapper.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Implementation of the Mapper::map(). Does the copy.
 * @param relPath The target path.
 * @param sourceFileStatus The source path.
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public void map(Text relPath, CopyListingFileStatus sourceFileStatus,
        Context context) throws IOException, InterruptedException {
  Path sourcePath = sourceFileStatus.getPath();

  if (LOG.isDebugEnabled())
    LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);

  Path target = new Path(targetWorkPath.makeQualified(targetFS.getUri(),
                        targetFS.getWorkingDirectory()) + relPath.toString());

  EnumSet<DistCpOptions.FileAttribute> fileAttributes
          = getFileAttributeSettings(context);
  final boolean preserveRawXattrs = context.getConfiguration().getBoolean(
      DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);

  final String description = "Copying " + sourcePath + " to " + target;
  context.setStatus(description);

  LOG.info(description);

  try {
    CopyListingFileStatus sourceCurrStatus;
    FileSystem sourceFS;
    try {
      sourceFS = sourcePath.getFileSystem(conf);
      final boolean preserveXAttrs =
          fileAttributes.contains(FileAttribute.XATTR);
      sourceCurrStatus = DistCpUtils.toCopyListingFileStatus(sourceFS,
        sourceFS.getFileStatus(sourcePath),
        fileAttributes.contains(FileAttribute.ACL), 
        preserveXAttrs, preserveRawXattrs);
    } catch (FileNotFoundException e) {
      throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
    }

    FileStatus targetStatus = null;

    try {
      targetStatus = targetFS.getFileStatus(target);
    } catch (FileNotFoundException ignore) {
      if (LOG.isDebugEnabled())
        LOG.debug("Path could not be found: " + target, ignore);
    }

    if (targetStatus != null && (targetStatus.isDirectory() != sourceCurrStatus.isDirectory())) {
      throw new IOException("Can't replace " + target + ". Target is " +
          getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus));
    }

    if (sourceCurrStatus.isDirectory()) {
      createTargetDirsWithRetry(description, target, context);
      return;
    }

    FileAction action = checkUpdate(sourceFS, sourceCurrStatus, target);
    if (action == FileAction.SKIP) {
      LOG.info("Skipping copy of " + sourceCurrStatus.getPath()
               + " to " + target);
      updateSkipCounters(context, sourceCurrStatus);
      context.write(null, new Text("SKIP: " + sourceCurrStatus.getPath()));
    } else {
      copyFileWithRetry(description, sourceCurrStatus, target, context,
          action, fileAttributes);
    }

    DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus,
        fileAttributes, preserveRawXattrs);
  } catch (IOException exception) {
    handleFailures(exception, sourceFileStatus, target, context);
  }
}
 
Example #30
Source File: TestDynamicInputFormat.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetSplits() throws Exception {
  DistCpOptions options = getOptions();
  Configuration configuration = new Configuration();
  configuration.set("mapred.map.tasks",
                    String.valueOf(options.getMaxMaps()));
  CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
          new Path(cluster.getFileSystem().getUri().toString()
                  +"/tmp/testDynInputFormat/fileList.seq"), options);

  JobContext jobContext = new JobContextImpl(configuration, new JobID());
  DynamicInputFormat<Text, CopyListingFileStatus> inputFormat =
      new DynamicInputFormat<Text, CopyListingFileStatus>();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);

  int nFiles = 0;
  int taskId = 0;

  for (InputSplit split : splits) {
    RecordReader<Text, CopyListingFileStatus> recordReader =
         inputFormat.createRecordReader(split, null);
    StubContext stubContext = new StubContext(jobContext.getConfiguration(),
                                              recordReader, taskId);
    final TaskAttemptContext taskAttemptContext
       = stubContext.getContext();
    
    recordReader.initialize(splits.get(0), taskAttemptContext);
    float previousProgressValue = 0f;
    while (recordReader.nextKeyValue()) {
      CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
      String source = fileStatus.getPath().toString();
      System.out.println(source);
      Assert.assertTrue(expectedFilePaths.contains(source));
      final float progress = recordReader.getProgress();
      Assert.assertTrue(progress >= previousProgressValue);
      Assert.assertTrue(progress >= 0.0f);
      Assert.assertTrue(progress <= 1.0f);
      previousProgressValue = progress;
      ++nFiles;
    }
    Assert.assertTrue(recordReader.getProgress() == 1.0f);

    ++taskId;
  }

  Assert.assertEquals(expectedFilePaths.size(), nFiles);
}