Java Code Examples for org.apache.hadoop.mapreduce.JobSubmissionFiles

The following examples show how to use org.apache.hadoop.mapreduce.JobSubmissionFiles. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: TestExternalCall.java    License: Apache License 2.0 6 votes vote down vote up
/**
* test methods run end execute of DistCp class. silple copy file
* @throws Exception 
*/
 @Test
 public void testCleanup() throws Exception {

     Configuration conf = getConf();

     Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf),
         conf);
     stagingDir.getFileSystem(conf).mkdirs(stagingDir);
     Path soure = createFile("tmp.txt");
     Path target = createFile("target.txt");

     DistCp distcp = new DistCp(conf, null);
     String[] arg = { soure.toString(), target.toString() };

     distcp.run(arg);
     Assert.assertTrue(fs.exists(target));

 
 }
 
Example 2
Source Project: hadoop   Source File: TestExternalCall.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * test main method of DistCp. Method should to call System.exit().
 * 
 */
@Test
public void testCleanupTestViaToolRunner() throws IOException, InterruptedException {

  Configuration conf = getConf();

  Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
  stagingDir.getFileSystem(conf).mkdirs(stagingDir);
 
  Path soure = createFile("tmp.txt");
  Path target = createFile("target.txt");
  try {

    String[] arg = {target.toString(),soure.toString()};
    DistCp.main(arg);
    Assert.fail();

  } catch (ExitException t) {
    Assert.assertTrue(fs.exists(target));
    Assert.assertEquals(t.status, 0);
    Assert.assertEquals(
        stagingDir.getFileSystem(conf).listStatus(stagingDir).length, 0);
  }

}
 
Example 3
Source Project: hadoop   Source File: TestIntegration.java    License: Apache License 2.0 6 votes vote down vote up
@Test(timeout=100000)
public void testCleanup() {
  try {
    Path sourcePath = new Path("noscheme:///file");
    List<Path> sources = new ArrayList<Path>();
    sources.add(sourcePath);

    DistCpOptions options = new DistCpOptions(sources, target);

    Configuration conf = getConf();
    Path stagingDir = JobSubmissionFiles.getStagingDir(
            new Cluster(conf), conf);
    stagingDir.getFileSystem(conf).mkdirs(stagingDir);

    try {
      new DistCp(conf, options).execute();
    } catch (Throwable t) {
      Assert.assertEquals(stagingDir.getFileSystem(conf).
          listStatus(stagingDir).length, 0);
    }
  } catch (Exception e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("testCleanup failed " + e.getMessage());
  }
}
 
Example 4
Source Project: big-c   Source File: TestExternalCall.java    License: Apache License 2.0 6 votes vote down vote up
/**
* test methods run end execute of DistCp class. silple copy file
* @throws Exception 
*/
 @Test
 public void testCleanup() throws Exception {

     Configuration conf = getConf();

     Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf),
         conf);
     stagingDir.getFileSystem(conf).mkdirs(stagingDir);
     Path soure = createFile("tmp.txt");
     Path target = createFile("target.txt");

     DistCp distcp = new DistCp(conf, null);
     String[] arg = { soure.toString(), target.toString() };

     distcp.run(arg);
     Assert.assertTrue(fs.exists(target));

 
 }
 
Example 5
Source Project: big-c   Source File: TestExternalCall.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * test main method of DistCp. Method should to call System.exit().
 * 
 */
@Test
public void testCleanupTestViaToolRunner() throws IOException, InterruptedException {

  Configuration conf = getConf();

  Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
  stagingDir.getFileSystem(conf).mkdirs(stagingDir);
 
  Path soure = createFile("tmp.txt");
  Path target = createFile("target.txt");
  try {

    String[] arg = {target.toString(),soure.toString()};
    DistCp.main(arg);
    Assert.fail();

  } catch (ExitException t) {
    Assert.assertTrue(fs.exists(target));
    Assert.assertEquals(t.status, 0);
    Assert.assertEquals(
        stagingDir.getFileSystem(conf).listStatus(stagingDir).length, 0);
  }

}
 
Example 6
Source Project: big-c   Source File: TestIntegration.java    License: Apache License 2.0 6 votes vote down vote up
@Test(timeout=100000)
public void testCleanup() {
  try {
    Path sourcePath = new Path("noscheme:///file");
    List<Path> sources = new ArrayList<Path>();
    sources.add(sourcePath);

    DistCpOptions options = new DistCpOptions(sources, target);

    Configuration conf = getConf();
    Path stagingDir = JobSubmissionFiles.getStagingDir(
            new Cluster(conf), conf);
    stagingDir.getFileSystem(conf).mkdirs(stagingDir);

    try {
      new DistCp(conf, options).execute();
    } catch (Throwable t) {
      Assert.assertEquals(stagingDir.getFileSystem(conf).
          listStatus(stagingDir).length, 0);
    }
  } catch (Exception e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("testCleanup failed " + e.getMessage());
  }
}
 
Example 7
Source Project: incubator-tez   Source File: MRHelpers.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
    Path inputSplitDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  
  org.apache.hadoop.mapreduce.InputSplit[] splits = 
      generateNewSplits(jobContext, null, 0);
  
  Configuration conf = jobContext.getConfiguration();

  JobSplitWriter.createSplitFiles(inputSplitDir, conf,
      inputSplitDir.getFileSystem(conf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        new TaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null));
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobContext.getCredentials());
}
 
Example 8
Source Project: incubator-tez   Source File: MRHelpers.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
    Path inputSplitDir) throws IOException {
  
  org.apache.hadoop.mapred.InputSplit[] splits = 
      generateOldSplits(jobConf, null, 0);
  
  JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
      inputSplitDir.getFileSystem(jobConf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        new TaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null));
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobConf.getCredentials());
}
 
Example 9
Source Project: tez   Source File: MRInputHelpers.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
                                                 Path inputSplitDir) throws IOException {

  org.apache.hadoop.mapred.InputSplit[] splits =
      generateOldSplits(jobConf, false, true, 0);

  JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
      inputSplitDir.getFileSystem(jobConf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        TaskLocationHint.createTaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null)
    );
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobConf.getCredentials());
}
 
Example 10
Source Project: circus-train   Source File: S3MapReduceCp.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a default working folder for the job, under the job staging directory
 *
 * @return Returns the working folder information
 * @throws Exception - EXception if any
 */
private Path createMetaFolderPath() throws Exception {
  Configuration configuration = getConf();
  Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(configuration), configuration);
  Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
  LOG.debug("Meta folder location: {}", metaFolderPath);
  configuration.set(S3MapReduceCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());
  return metaFolderPath;
}
 
Example 11
Source Project: hadoop   Source File: SplitMetaInfoReader.java    License: Apache License 2.0 5 votes vote down vote up
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
    JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) 
throws IOException {
  long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE,
      MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE);
  Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
  String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString();
  FileStatus fStatus = fs.getFileStatus(metaSplitFile);
  if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
    throw new IOException("Split metadata size exceeded " +
        maxMetaInfoSize +". Aborting job " + jobId);
  }
  FSDataInputStream in = fs.open(metaSplitFile);
  byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
  in.readFully(header);
  if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
    throw new IOException("Invalid header on split file");
  }
  int vers = WritableUtils.readVInt(in);
  if (vers != JobSplit.META_SPLIT_VERSION) {
    in.close();
    throw new IOException("Unsupported split version " + vers);
  }
  int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
  JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = 
    new JobSplit.TaskSplitMetaInfo[numSplits];
  for (int i = 0; i < numSplits; i++) {
    JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
    splitMetaInfo.readFields(in);
    JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
        jobSplitFile, 
        splitMetaInfo.getStartOffset());
    allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, 
        splitMetaInfo.getLocations(), 
        splitMetaInfo.getInputDataLength());
  }
  in.close();
  return allSplitMetaInfo;
}
 
Example 12
Source Project: hadoop   Source File: JobSplitWriter.java    License: Apache License 2.0 5 votes vote down vote up
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, T[] splits) 
throws IOException, InterruptedException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
Example 13
Source Project: hadoop   Source File: JobSplitWriter.java    License: Apache License 2.0 5 votes vote down vote up
public static void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, 
    org.apache.hadoop.mapred.InputSplit[] splits) 
throws IOException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
Example 14
Source Project: hadoop   Source File: JobSplitWriter.java    License: Apache License 2.0 5 votes vote down vote up
private static FSDataOutputStream createFile(FileSystem fs, Path splitFile, 
    Configuration job)  throws IOException {
  FSDataOutputStream out = FileSystem.create(fs, splitFile, 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));
  int replication = job.getInt(Job.SUBMIT_REPLICATION, 10);
  fs.setReplication(splitFile, (short)replication);
  writeSplitHeader(out);
  return out;
}
 
Example 15
Source Project: hadoop   Source File: DistCp.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a default working folder for the job, under the
 * job staging directory
 *
 * @return Returns the working folder information
 * @throws Exception - EXception if any
 */
private Path createMetaFolderPath() throws Exception {
  Configuration configuration = getConf();
  Path stagingDir = JobSubmissionFiles.getStagingDir(
          new Cluster(configuration), configuration);
  Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
  if (LOG.isDebugEnabled())
    LOG.debug("Meta folder location: " + metaFolderPath);
  configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
  return metaFolderPath;
}
 
Example 16
Source Project: big-c   Source File: SplitMetaInfoReader.java    License: Apache License 2.0 5 votes vote down vote up
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
    JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) 
throws IOException {
  long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE,
      MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE);
  Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
  String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString();
  FileStatus fStatus = fs.getFileStatus(metaSplitFile);
  if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
    throw new IOException("Split metadata size exceeded " +
        maxMetaInfoSize +". Aborting job " + jobId);
  }
  FSDataInputStream in = fs.open(metaSplitFile);
  byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
  in.readFully(header);
  if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
    throw new IOException("Invalid header on split file");
  }
  int vers = WritableUtils.readVInt(in);
  if (vers != JobSplit.META_SPLIT_VERSION) {
    in.close();
    throw new IOException("Unsupported split version " + vers);
  }
  int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
  JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = 
    new JobSplit.TaskSplitMetaInfo[numSplits];
  for (int i = 0; i < numSplits; i++) {
    JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
    splitMetaInfo.readFields(in);
    JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
        jobSplitFile, 
        splitMetaInfo.getStartOffset());
    allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, 
        splitMetaInfo.getLocations(), 
        splitMetaInfo.getInputDataLength());
  }
  in.close();
  return allSplitMetaInfo;
}
 
Example 17
Source Project: big-c   Source File: JobSplitWriter.java    License: Apache License 2.0 5 votes vote down vote up
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, T[] splits) 
throws IOException, InterruptedException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
Example 18
Source Project: big-c   Source File: JobSplitWriter.java    License: Apache License 2.0 5 votes vote down vote up
public static void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, 
    org.apache.hadoop.mapred.InputSplit[] splits) 
throws IOException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
Example 19
Source Project: big-c   Source File: JobSplitWriter.java    License: Apache License 2.0 5 votes vote down vote up
private static FSDataOutputStream createFile(FileSystem fs, Path splitFile, 
    Configuration job)  throws IOException {
  FSDataOutputStream out = FileSystem.create(fs, splitFile, 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));
  int replication = job.getInt(Job.SUBMIT_REPLICATION, 10);
  fs.setReplication(splitFile, (short)replication);
  writeSplitHeader(out);
  return out;
}
 
Example 20
Source Project: big-c   Source File: DistCp.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a default working folder for the job, under the
 * job staging directory
 *
 * @return Returns the working folder information
 * @throws Exception - EXception if any
 */
private Path createMetaFolderPath() throws Exception {
  Configuration configuration = getConf();
  Path stagingDir = JobSubmissionFiles.getStagingDir(
          new Cluster(configuration), configuration);
  Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
  if (LOG.isDebugEnabled())
    LOG.debug("Meta folder location: " + metaFolderPath);
  configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
  return metaFolderPath;
}
 
Example 21
Source Project: tez   Source File: MRInputHelpers.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
                                                 Path inputSplitDir) throws IOException, InterruptedException,
    ClassNotFoundException {

  org.apache.hadoop.mapreduce.InputSplit[] splits =
      generateNewSplits(jobContext, false, true, 0);

  Configuration conf = jobContext.getConfiguration();

  JobSplitWriter.createSplitFiles(inputSplitDir, conf,
      inputSplitDir.getFileSystem(conf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        TaskLocationHint.createTaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null)
    );
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobContext.getCredentials());
}
 
Example 22
Source Project: hbase   Source File: JobUtil.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * Initializes the staging directory and returns the qualified path.
 *
 * @param conf conf system configuration
 * @return qualified staging directory path
 * @throws IOException if the ownership on the staging directory is not as expected
 * @throws InterruptedException if the thread getting the staging directory is interrupted
 */
public static Path getQualifiedStagingDir(Configuration conf)
  throws IOException, InterruptedException {
  Cluster cluster = new Cluster(conf);
  Path stagingDir = JobSubmissionFiles.getStagingDir(cluster, conf);
  return cluster.getFileSystem().makeQualified(stagingDir);
}
 
Example 23
Source Project: hbase   Source File: JobUtil.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Initializes the staging directory and returns the path.
 *
 * @param conf system configuration
 * @return staging directory path
 * @throws IOException if the ownership on the staging directory is not as expected
 * @throws InterruptedException if the thread getting the staging directory is interrupted
 */
public static Path getStagingDir(Configuration conf)
    throws IOException, InterruptedException {
  return JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
}