org.apache.hadoop.mapreduce.JobSubmissionFiles Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.JobSubmissionFiles. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: MRInputHelpers.java From tez with Apache License 2.0

6 votes

/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
                                                 Path inputSplitDir) throws IOException {

  org.apache.hadoop.mapred.InputSplit[] splits =
      generateOldSplits(jobConf, false, true, 0);

  JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
      inputSplitDir.getFileSystem(jobConf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        TaskLocationHint.createTaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null)
    );
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobConf.getCredentials());
}

Example #2

Source File: MRHelpers.java From incubator-tez with Apache License 2.0

6 votes

/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
    Path inputSplitDir) throws IOException {
  
  org.apache.hadoop.mapred.InputSplit[] splits = 
      generateOldSplits(jobConf, null, 0);
  
  JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
      inputSplitDir.getFileSystem(jobConf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        new TaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null));
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobConf.getCredentials());
}

Example #3

Source File: MRHelpers.java From incubator-tez with Apache License 2.0

6 votes

/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
    Path inputSplitDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  
  org.apache.hadoop.mapreduce.InputSplit[] splits = 
      generateNewSplits(jobContext, null, 0);
  
  Configuration conf = jobContext.getConfiguration();

  JobSplitWriter.createSplitFiles(inputSplitDir, conf,
      inputSplitDir.getFileSystem(conf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        new TaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null));
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobContext.getCredentials());
}

Example #4

Source File: TestIntegration.java From big-c with Apache License 2.0

6 votes

@Test(timeout=100000)
public void testCleanup() {
  try {
    Path sourcePath = new Path("noscheme:///file");
    List<Path> sources = new ArrayList<Path>();
    sources.add(sourcePath);

    DistCpOptions options = new DistCpOptions(sources, target);

    Configuration conf = getConf();
    Path stagingDir = JobSubmissionFiles.getStagingDir(
            new Cluster(conf), conf);
    stagingDir.getFileSystem(conf).mkdirs(stagingDir);

    try {
      new DistCp(conf, options).execute();
    } catch (Throwable t) {
      Assert.assertEquals(stagingDir.getFileSystem(conf).
          listStatus(stagingDir).length, 0);
    }
  } catch (Exception e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("testCleanup failed " + e.getMessage());
  }
}

Example #5

Source File: TestExternalCall.java From hadoop with Apache License 2.0

6 votes

/**
* test methods run end execute of DistCp class. silple copy file
* @throws Exception 
*/
 @Test
 public void testCleanup() throws Exception {

     Configuration conf = getConf();

     Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf),
         conf);
     stagingDir.getFileSystem(conf).mkdirs(stagingDir);
     Path soure = createFile("tmp.txt");
     Path target = createFile("target.txt");

     DistCp distcp = new DistCp(conf, null);
     String[] arg = { soure.toString(), target.toString() };

     distcp.run(arg);
     Assert.assertTrue(fs.exists(target));

 
 }

Example #6

Source File: TestExternalCall.java From hadoop with Apache License 2.0

6 votes

/**
 * test main method of DistCp. Method should to call System.exit().
 * 
 */
@Test
public void testCleanupTestViaToolRunner() throws IOException, InterruptedException {

  Configuration conf = getConf();

  Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
  stagingDir.getFileSystem(conf).mkdirs(stagingDir);
 
  Path soure = createFile("tmp.txt");
  Path target = createFile("target.txt");
  try {

    String[] arg = {target.toString(),soure.toString()};
    DistCp.main(arg);
    Assert.fail();

  } catch (ExitException t) {
    Assert.assertTrue(fs.exists(target));
    Assert.assertEquals(t.status, 0);
    Assert.assertEquals(
        stagingDir.getFileSystem(conf).listStatus(stagingDir).length, 0);
  }

}

Example #7

Source File: TestIntegration.java From hadoop with Apache License 2.0

6 votes

@Test(timeout=100000)
public void testCleanup() {
  try {
    Path sourcePath = new Path("noscheme:///file");
    List<Path> sources = new ArrayList<Path>();
    sources.add(sourcePath);

    DistCpOptions options = new DistCpOptions(sources, target);

    Configuration conf = getConf();
    Path stagingDir = JobSubmissionFiles.getStagingDir(
            new Cluster(conf), conf);
    stagingDir.getFileSystem(conf).mkdirs(stagingDir);

    try {
      new DistCp(conf, options).execute();
    } catch (Throwable t) {
      Assert.assertEquals(stagingDir.getFileSystem(conf).
          listStatus(stagingDir).length, 0);
    }
  } catch (Exception e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("testCleanup failed " + e.getMessage());
  }
}

Example #8

Source File: TestExternalCall.java From big-c with Apache License 2.0

6 votes

/**
 * test main method of DistCp. Method should to call System.exit().
 * 
 */
@Test
public void testCleanupTestViaToolRunner() throws IOException, InterruptedException {

  Configuration conf = getConf();

  Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
  stagingDir.getFileSystem(conf).mkdirs(stagingDir);
 
  Path soure = createFile("tmp.txt");
  Path target = createFile("target.txt");
  try {

    String[] arg = {target.toString(),soure.toString()};
    DistCp.main(arg);
    Assert.fail();

  } catch (ExitException t) {
    Assert.assertTrue(fs.exists(target));
    Assert.assertEquals(t.status, 0);
    Assert.assertEquals(
        stagingDir.getFileSystem(conf).listStatus(stagingDir).length, 0);
  }

}

Example #9

Source File: TestExternalCall.java From big-c with Apache License 2.0

6 votes

/**
* test methods run end execute of DistCp class. silple copy file
* @throws Exception 
*/
 @Test
 public void testCleanup() throws Exception {

     Configuration conf = getConf();

     Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf),
         conf);
     stagingDir.getFileSystem(conf).mkdirs(stagingDir);
     Path soure = createFile("tmp.txt");
     Path target = createFile("target.txt");

     DistCp distcp = new DistCp(conf, null);
     String[] arg = { soure.toString(), target.toString() };

     distcp.run(arg);
     Assert.assertTrue(fs.exists(target));

 
 }

Example #10

Source File: JobSplitWriter.java From big-c with Apache License 2.0

5 votes

public static void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, 
    org.apache.hadoop.mapred.InputSplit[] splits) 
throws IOException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}

Example #11

Source File: MRInputHelpers.java From tez with Apache License 2.0

5 votes

/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
                                                 Path inputSplitDir) throws IOException, InterruptedException,
    ClassNotFoundException {

  org.apache.hadoop.mapreduce.InputSplit[] splits =
      generateNewSplits(jobContext, false, true, 0);

  Configuration conf = jobContext.getConfiguration();

  JobSplitWriter.createSplitFiles(inputSplitDir, conf,
      inputSplitDir.getFileSystem(conf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        TaskLocationHint.createTaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null)
    );
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobContext.getCredentials());
}

Example #12

Source File: DistCp.java From big-c with Apache License 2.0

5 votes

/**
 * Create a default working folder for the job, under the
 * job staging directory
 *
 * @return Returns the working folder information
 * @throws Exception - EXception if any
 */
private Path createMetaFolderPath() throws Exception {
  Configuration configuration = getConf();
  Path stagingDir = JobSubmissionFiles.getStagingDir(
          new Cluster(configuration), configuration);
  Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
  if (LOG.isDebugEnabled())
    LOG.debug("Meta folder location: " + metaFolderPath);
  configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
  return metaFolderPath;
}

Example #13

Source File: JobSplitWriter.java From big-c with Apache License 2.0

5 votes

private static FSDataOutputStream createFile(FileSystem fs, Path splitFile, 
    Configuration job)  throws IOException {
  FSDataOutputStream out = FileSystem.create(fs, splitFile, 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));
  int replication = job.getInt(Job.SUBMIT_REPLICATION, 10);
  fs.setReplication(splitFile, (short)replication);
  writeSplitHeader(out);
  return out;
}

Example #14

Source File: S3MapReduceCp.java From circus-train with Apache License 2.0

5 votes

/**
 * Create a default working folder for the job, under the job staging directory
 *
 * @return Returns the working folder information
 * @throws Exception - EXception if any
 */
private Path createMetaFolderPath() throws Exception {
  Configuration configuration = getConf();
  Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(configuration), configuration);
  Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
  LOG.debug("Meta folder location: {}", metaFolderPath);
  configuration.set(S3MapReduceCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());
  return metaFolderPath;
}

Example #15

Source File: JobSplitWriter.java From big-c with Apache License 2.0

5 votes

public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, T[] splits) 
throws IOException, InterruptedException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}

Example #16

Source File: SplitMetaInfoReader.java From big-c with Apache License 2.0

5 votes

public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
    JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) 
throws IOException {
  long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE,
      MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE);
  Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
  String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString();
  FileStatus fStatus = fs.getFileStatus(metaSplitFile);
  if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
    throw new IOException("Split metadata size exceeded " +
        maxMetaInfoSize +". Aborting job " + jobId);
  }
  FSDataInputStream in = fs.open(metaSplitFile);
  byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
  in.readFully(header);
  if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
    throw new IOException("Invalid header on split file");
  }
  int vers = WritableUtils.readVInt(in);
  if (vers != JobSplit.META_SPLIT_VERSION) {
    in.close();
    throw new IOException("Unsupported split version " + vers);
  }
  int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
  JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = 
    new JobSplit.TaskSplitMetaInfo[numSplits];
  for (int i = 0; i < numSplits; i++) {
    JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
    splitMetaInfo.readFields(in);
    JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
        jobSplitFile, 
        splitMetaInfo.getStartOffset());
    allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, 
        splitMetaInfo.getLocations(), 
        splitMetaInfo.getInputDataLength());
  }
  in.close();
  return allSplitMetaInfo;
}

Example #17

Source File: DistCp.java From hadoop with Apache License 2.0

5 votes

/**
 * Create a default working folder for the job, under the
 * job staging directory
 *
 * @return Returns the working folder information
 * @throws Exception - EXception if any
 */
private Path createMetaFolderPath() throws Exception {
  Configuration configuration = getConf();
  Path stagingDir = JobSubmissionFiles.getStagingDir(
          new Cluster(configuration), configuration);
  Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
  if (LOG.isDebugEnabled())
    LOG.debug("Meta folder location: " + metaFolderPath);
  configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
  return metaFolderPath;
}

Example #18

Source File: JobSplitWriter.java From hadoop with Apache License 2.0

5 votes

private static FSDataOutputStream createFile(FileSystem fs, Path splitFile, 
    Configuration job)  throws IOException {
  FSDataOutputStream out = FileSystem.create(fs, splitFile, 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));
  int replication = job.getInt(Job.SUBMIT_REPLICATION, 10);
  fs.setReplication(splitFile, (short)replication);
  writeSplitHeader(out);
  return out;
}

Example #19

Source File: JobSplitWriter.java From hadoop with Apache License 2.0

5 votes

public static void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, 
    org.apache.hadoop.mapred.InputSplit[] splits) 
throws IOException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}

Example #20

Source File: JobSplitWriter.java From hadoop with Apache License 2.0

5 votes

public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, T[] splits) 
throws IOException, InterruptedException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}

Example #21

Source File: SplitMetaInfoReader.java From hadoop with Apache License 2.0

5 votes

public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
    JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) 
throws IOException {
  long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE,
      MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE);
  Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
  String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString();
  FileStatus fStatus = fs.getFileStatus(metaSplitFile);
  if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
    throw new IOException("Split metadata size exceeded " +
        maxMetaInfoSize +". Aborting job " + jobId);
  }
  FSDataInputStream in = fs.open(metaSplitFile);
  byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
  in.readFully(header);
  if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
    throw new IOException("Invalid header on split file");
  }
  int vers = WritableUtils.readVInt(in);
  if (vers != JobSplit.META_SPLIT_VERSION) {
    in.close();
    throw new IOException("Unsupported split version " + vers);
  }
  int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
  JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = 
    new JobSplit.TaskSplitMetaInfo[numSplits];
  for (int i = 0; i < numSplits; i++) {
    JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
    splitMetaInfo.readFields(in);
    JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
        jobSplitFile, 
        splitMetaInfo.getStartOffset());
    allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, 
        splitMetaInfo.getLocations(), 
        splitMetaInfo.getInputDataLength());
  }
  in.close();
  return allSplitMetaInfo;
}

Example #22

Source File: JobUtil.java From hbase with Apache License 2.0

3 votes

/**
 * Initializes the staging directory and returns the qualified path.
 *
 * @param conf conf system configuration
 * @return qualified staging directory path
 * @throws IOException if the ownership on the staging directory is not as expected
 * @throws InterruptedException if the thread getting the staging directory is interrupted
 */
public static Path getQualifiedStagingDir(Configuration conf)
  throws IOException, InterruptedException {
  Cluster cluster = new Cluster(conf);
  Path stagingDir = JobSubmissionFiles.getStagingDir(cluster, conf);
  return cluster.getFileSystem().makeQualified(stagingDir);
}

Example #23

Source File: JobUtil.java From hbase with Apache License 2.0

2 votes

/**
 * Initializes the staging directory and returns the path.
 *
 * @param conf system configuration
 * @return staging directory path
 * @throws IOException if the ownership on the staging directory is not as expected
 * @throws InterruptedException if the thread getting the staging directory is interrupted
 */
public static Path getStagingDir(Configuration conf)
    throws IOException, InterruptedException {
  return JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
}