org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JobSplitWriter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, T[] splits) 
throws IOException, InterruptedException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
Example #2
Source File: JobSplitWriter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public static void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, 
    org.apache.hadoop.mapred.InputSplit[] splits) 
throws IOException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
Example #3
Source File: JobSplitWriter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T extends InputSplit> 
SplitMetaInfo[] writeNewSplits(Configuration conf, 
    T[] array, FSDataOutputStream out)
throws IOException, InterruptedException {

  SplitMetaInfo[] info = new SplitMetaInfo[array.length];
  if (array.length != 0) {
    SerializationFactory factory = new SerializationFactory(conf);
    int i = 0;
    int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
        MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
    long offset = out.getPos();
    for(T split: array) {
      long prevCount = out.getPos();
      Text.writeString(out, split.getClass().getName());
      Serializer<T> serializer = 
        factory.getSerializer((Class<T>) split.getClass());
      serializer.open(out);
      serializer.serialize(split);
      long currCount = out.getPos();
      String[] locations = split.getLocations();
      if (locations.length > maxBlockLocations) {
        LOG.warn("Max block location exceeded for split: "
            + split + " splitsize: " + locations.length +
            " maxsize: " + maxBlockLocations);
        locations = Arrays.copyOf(locations, maxBlockLocations);
      }
      info[i++] = 
        new JobSplit.SplitMetaInfo( 
            locations, offset,
            split.getLength());
      offset += currCount - prevCount;
    }
  }
  return info;
}
 
Example #4
Source File: JobSplitWriter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static SplitMetaInfo[] writeOldSplits(
    org.apache.hadoop.mapred.InputSplit[] splits,
    FSDataOutputStream out, Configuration conf) throws IOException {
  SplitMetaInfo[] info = new SplitMetaInfo[splits.length];
  if (splits.length != 0) {
    int i = 0;
    long offset = out.getPos();
    int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
        MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
    for(org.apache.hadoop.mapred.InputSplit split: splits) {
      long prevLen = out.getPos();
      Text.writeString(out, split.getClass().getName());
      split.write(out);
      long currLen = out.getPos();
      String[] locations = split.getLocations();
      if (locations.length > maxBlockLocations) {
        LOG.warn("Max block location exceeded for split: "
            + split + " splitsize: " + locations.length +
            " maxsize: " + maxBlockLocations);
        locations = Arrays.copyOf(locations,maxBlockLocations);
      }
      info[i++] = new JobSplit.SplitMetaInfo( 
          locations, offset,
          split.getLength());
      offset += currLen - prevLen;
    }
  }
  return info;
}
 
Example #5
Source File: JobSplitWriter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void writeJobSplitMetaInfo(FileSystem fs, Path filename, 
    FsPermission p, int splitMetaInfoVersion, 
    JobSplit.SplitMetaInfo[] allSplitMetaInfo) 
throws IOException {
  // write the splits meta-info to a file for the job tracker
  FSDataOutputStream out = 
    FileSystem.create(fs, filename, p);
  out.write(JobSplit.META_SPLIT_FILE_HEADER);
  WritableUtils.writeVInt(out, splitMetaInfoVersion);
  WritableUtils.writeVInt(out, allSplitMetaInfo.length);
  for (JobSplit.SplitMetaInfo splitMetaInfo : allSplitMetaInfo) {
    splitMetaInfo.write(out);
  }
  out.close();
}
 
Example #6
Source File: JobSplitWriter.java    From big-c with Apache License 2.0 5 votes vote down vote up
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, T[] splits) 
throws IOException, InterruptedException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
Example #7
Source File: JobSplitWriter.java    From big-c with Apache License 2.0 5 votes vote down vote up
public static void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, 
    org.apache.hadoop.mapred.InputSplit[] splits) 
throws IOException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
Example #8
Source File: JobSplitWriter.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T extends InputSplit> 
SplitMetaInfo[] writeNewSplits(Configuration conf, 
    T[] array, FSDataOutputStream out)
throws IOException, InterruptedException {

  SplitMetaInfo[] info = new SplitMetaInfo[array.length];
  if (array.length != 0) {
    SerializationFactory factory = new SerializationFactory(conf);
    int i = 0;
    int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
        MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
    long offset = out.getPos();
    for(T split: array) {
      long prevCount = out.getPos();
      Text.writeString(out, split.getClass().getName());
      Serializer<T> serializer = 
        factory.getSerializer((Class<T>) split.getClass());
      serializer.open(out);
      serializer.serialize(split);
      long currCount = out.getPos();
      String[] locations = split.getLocations();
      if (locations.length > maxBlockLocations) {
        LOG.warn("Max block location exceeded for split: "
            + split + " splitsize: " + locations.length +
            " maxsize: " + maxBlockLocations);
        locations = Arrays.copyOf(locations, maxBlockLocations);
      }
      info[i++] = 
        new JobSplit.SplitMetaInfo( 
            locations, offset,
            split.getLength());
      offset += currCount - prevCount;
    }
  }
  return info;
}
 
Example #9
Source File: JobSplitWriter.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static SplitMetaInfo[] writeOldSplits(
    org.apache.hadoop.mapred.InputSplit[] splits,
    FSDataOutputStream out, Configuration conf) throws IOException {
  SplitMetaInfo[] info = new SplitMetaInfo[splits.length];
  if (splits.length != 0) {
    int i = 0;
    long offset = out.getPos();
    int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
        MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
    for(org.apache.hadoop.mapred.InputSplit split: splits) {
      long prevLen = out.getPos();
      Text.writeString(out, split.getClass().getName());
      split.write(out);
      long currLen = out.getPos();
      String[] locations = split.getLocations();
      if (locations.length > maxBlockLocations) {
        LOG.warn("Max block location exceeded for split: "
            + split + " splitsize: " + locations.length +
            " maxsize: " + maxBlockLocations);
        locations = Arrays.copyOf(locations,maxBlockLocations);
      }
      info[i++] = new JobSplit.SplitMetaInfo( 
          locations, offset,
          split.getLength());
      offset += currLen - prevLen;
    }
  }
  return info;
}
 
Example #10
Source File: JobSplitWriter.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static void writeJobSplitMetaInfo(FileSystem fs, Path filename, 
    FsPermission p, int splitMetaInfoVersion, 
    JobSplit.SplitMetaInfo[] allSplitMetaInfo) 
throws IOException {
  // write the splits meta-info to a file for the job tracker
  FSDataOutputStream out = 
    FileSystem.create(fs, filename, p);
  out.write(JobSplit.META_SPLIT_FILE_HEADER);
  WritableUtils.writeVInt(out, splitMetaInfoVersion);
  WritableUtils.writeVInt(out, allSplitMetaInfo.length);
  for (JobSplit.SplitMetaInfo splitMetaInfo : allSplitMetaInfo) {
    splitMetaInfo.write(out);
  }
  out.close();
}