org.apache.hadoop.mapreduce.split.JobSplitWriter Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.split.JobSplitWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: JobSubmitter.java From hadoop with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
private <T extends InputSplit>
int writeNewSplits(JobContext job, Path jobSubmitDir) throws IOException,
    InterruptedException, ClassNotFoundException {
  Configuration conf = job.getConfiguration();
  InputFormat<?, ?> input =
    ReflectionUtils.newInstance(job.getInputFormatClass(), conf);

  List<InputSplit> splits = input.getSplits(job);
  T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]);

  // sort the splits into order based on size, so that the biggest
  // go first
  Arrays.sort(array, new SplitComparator());
  JobSplitWriter.createSplitFiles(jobSubmitDir, conf, 
      jobSubmitDir.getFileSystem(conf), array);
  return array.length;
}

Example #2

Source File: JobSubmitter.java From big-c with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
private <T extends InputSplit>
int writeNewSplits(JobContext job, Path jobSubmitDir) throws IOException,
    InterruptedException, ClassNotFoundException {
  Configuration conf = job.getConfiguration();
  InputFormat<?, ?> input =
    ReflectionUtils.newInstance(job.getInputFormatClass(), conf);

  List<InputSplit> splits = input.getSplits(job);
  T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]);

  // sort the splits into order based on size, so that the biggest
  // go first
  Arrays.sort(array, new SplitComparator());
  JobSplitWriter.createSplitFiles(jobSubmitDir, conf, 
      jobSubmitDir.getFileSystem(conf), array);
  return array.length;
}

Example #3

Source File: MRHelpers.java From incubator-tez with Apache License 2.0

6 votes

/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
    Path inputSplitDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  
  org.apache.hadoop.mapreduce.InputSplit[] splits = 
      generateNewSplits(jobContext, null, 0);
  
  Configuration conf = jobContext.getConfiguration();

  JobSplitWriter.createSplitFiles(inputSplitDir, conf,
      inputSplitDir.getFileSystem(conf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        new TaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null));
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobContext.getCredentials());
}

Example #4

Source File: MRHelpers.java From incubator-tez with Apache License 2.0

6 votes

/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
    Path inputSplitDir) throws IOException {
  
  org.apache.hadoop.mapred.InputSplit[] splits = 
      generateOldSplits(jobConf, null, 0);
  
  JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
      inputSplitDir.getFileSystem(jobConf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        new TaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null));
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobConf.getCredentials());
}

Example #5

Source File: MRInputHelpers.java From tez with Apache License 2.0

6 votes

/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
                                                 Path inputSplitDir) throws IOException {

  org.apache.hadoop.mapred.InputSplit[] splits =
      generateOldSplits(jobConf, false, true, 0);

  JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
      inputSplitDir.getFileSystem(jobConf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        TaskLocationHint.createTaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null)
    );
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobConf.getCredentials());
}

Example #6

Source File: JobSubmitter.java From hadoop with Apache License 2.0

5 votes

private int writeOldSplits(JobConf job, Path jobSubmitDir) 
throws IOException {
  org.apache.hadoop.mapred.InputSplit[] splits =
  job.getInputFormat().getSplits(job, job.getNumMapTasks());
  // sort the splits into order based on size, so that the biggest
  // go first
  Arrays.sort(splits, new Comparator<org.apache.hadoop.mapred.InputSplit>() {
    public int compare(org.apache.hadoop.mapred.InputSplit a,
                       org.apache.hadoop.mapred.InputSplit b) {
      try {
        long left = a.getLength();
        long right = b.getLength();
        if (left == right) {
          return 0;
        } else if (left < right) {
          return 1;
        } else {
          return -1;
        }
      } catch (IOException ie) {
        throw new RuntimeException("Problem getting input split size", ie);
      }
    }
  });
  JobSplitWriter.createSplitFiles(jobSubmitDir, job, 
      jobSubmitDir.getFileSystem(job), splits);
  return splits.length;
}

Example #7

Source File: JobSubmitter.java From big-c with Apache License 2.0

5 votes

private int writeOldSplits(JobConf job, Path jobSubmitDir) 
throws IOException {
  org.apache.hadoop.mapred.InputSplit[] splits =
  job.getInputFormat().getSplits(job, job.getNumMapTasks());
  // sort the splits into order based on size, so that the biggest
  // go first
  Arrays.sort(splits, new Comparator<org.apache.hadoop.mapred.InputSplit>() {
    public int compare(org.apache.hadoop.mapred.InputSplit a,
                       org.apache.hadoop.mapred.InputSplit b) {
      try {
        long left = a.getLength();
        long right = b.getLength();
        if (left == right) {
          return 0;
        } else if (left < right) {
          return 1;
        } else {
          return -1;
        }
      } catch (IOException ie) {
        throw new RuntimeException("Problem getting input split size", ie);
      }
    }
  });
  JobSplitWriter.createSplitFiles(jobSubmitDir, job, 
      jobSubmitDir.getFileSystem(job), splits);
  return splits.length;
}

Example #8

Source File: MRInputHelpers.java From tez with Apache License 2.0

5 votes

/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
                                                 Path inputSplitDir) throws IOException, InterruptedException,
    ClassNotFoundException {

  org.apache.hadoop.mapreduce.InputSplit[] splits =
      generateNewSplits(jobContext, false, true, 0);

  Configuration conf = jobContext.getConfiguration();

  JobSplitWriter.createSplitFiles(inputSplitDir, conf,
      inputSplitDir.getFileSystem(conf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        TaskLocationHint.createTaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null)
    );
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobContext.getCredentials());
}