Java Code Examples for org.apache.hadoop.mapred.InputSplit#getLength()

The following examples show how to use org.apache.hadoop.mapred.InputSplit#getLength() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HdfsDataFragmenter.java    From pxf with Apache License 2.0 6 votes vote down vote up
protected List<InputSplit> getSplits(Path path) throws IOException {
    PxfInputFormat pxfInputFormat = new PxfInputFormat();
    PxfInputFormat.setInputPaths(jobConf, path);
    InputSplit[] splits = pxfInputFormat.getSplits(jobConf, 1);
    List<InputSplit> result = new ArrayList<>();

    /*
     * HD-2547: If the file is empty, an empty split is returned: no
     * locations and no length.
     */
    if (splits != null) {
        for (InputSplit split : splits) {
            if (split.getLength() > 0) {
                result.add(split);
            }
        }
    }

    return result;
}
 
Example 2
Source File: HdfsDataFragmenter.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Override
public FragmentStats getFragmentStats() throws Exception {
    String absoluteDataPath = hcfsType.getDataUri(jobConf, context);
    List<InputSplit> splits = getSplits(new Path(absoluteDataPath));

    if (splits.isEmpty()) {
        return new FragmentStats(0, 0, 0);
    }
    long totalSize = 0;
    for (InputSplit split : splits) {
        totalSize += split.getLength();
    }
    InputSplit firstSplit = splits.get(0);
    return new FragmentStats(splits.size(), firstSplit.getLength(), totalSize);
}
 
Example 3
Source File: CompositeInputSplit.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}
 
Example 4
Source File: CompositeInputSplit.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}
 
Example 5
Source File: DeprecatedParquetInputFormat.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public RecordReaderWrapper(
    InputSplit oldSplit, JobConf oldJobConf, Reporter reporter)
    throws IOException {
  splitLen = oldSplit.getLength();

  try {
    realReader = new ParquetRecordReader<V>(
        ParquetInputFormat.<V>getReadSupportInstance(oldJobConf),
        ParquetInputFormat.getFilter(oldJobConf));

    if (oldSplit instanceof ParquetInputSplitWrapper) {
      realReader.initialize(((ParquetInputSplitWrapper) oldSplit).realSplit, oldJobConf, reporter);
    } else if (oldSplit instanceof FileSplit) {
      realReader.initialize((FileSplit) oldSplit, oldJobConf, reporter);
    } else {
      throw new IllegalArgumentException(
          "Invalid split (not a FileSplit or ParquetInputSplitWrapper): " + oldSplit);
    }

    // read once to gain access to key and value objects
    if (realReader.nextKeyValue()) {
      firstRecord = true;
      valueContainer = new Container<V>();
      valueContainer.set(realReader.getCurrentValue());

    } else {
      eof = true;
    }
  } catch (InterruptedException e) {
    Thread.interrupted();
    throw new IOException(e);
  }
}
 
Example 6
Source File: ParquetAsTextInputFormat.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
public TextRecordReaderWrapper(ParquetInputFormat<SimpleGroup> newInputFormat,
                           InputSplit oldSplit,
                           JobConf oldJobConf,
                           Reporter reporter) throws IOException {

    splitLen = oldSplit.getLength();

    try {
        ReadSupport<SimpleGroup> rs = ParquetInputFormat.getReadSupportInstance(oldJobConf);
        realReader = new ParquetRecordReader<>(rs);
        realReader.initialize(((StreamingParquetInputSplitWrapper)oldSplit).realSplit, oldJobConf, reporter);

        oldJobConf.set("map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString());
        oldJobConf.set("mapreduce.map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString());

        // read once to gain access to key and value objects
        if (realReader.nextKeyValue()) {

          firstRecord = true;
          valueContainer = new Container<>();
          SimpleGroup v = realReader.getCurrentValue();
          valueContainer.set(v);
          ls = groupToStrings(v);
        } else {

          eof = true;
        }
    } catch (InterruptedException e) {
        Thread.interrupted();
        throw new IOException(e);
    }
}
 
Example 7
Source File: CompositeInputSplit.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}
 
Example 8
Source File: TezGroupedSplit.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public void addSplit(InputSplit split) {
  wrappedSplits.add(split);
  try {
    length += split.getLength();
  } catch (Exception e) {
    throw new TezUncheckedException(e);
  }
}
 
Example 9
Source File: SplittableXmlInputFormat.java    From Hive-XML-SerDe with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<LongWritable, Text> getRecordReader(InputSplit inputSplit, JobConf job, Reporter reporter) throws IOException {

    InputStream inputStream = null;
    try {
        inputStream = getInputStream(job, (FileSplit) inputSplit);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }
    long start = ((FileSplit) inputSplit).getStart();
    long end = start + inputSplit.getLength();

    return new HiveXmlRecordReader(job, inputStream, start, end);
}
 
Example 10
Source File: TezGroupedSplit.java    From tez with Apache License 2.0 5 votes vote down vote up
public void addSplit(InputSplit split) {
  wrappedSplits.add(split);
  try {
    length += split.getLength();
  } catch (Exception e) {
    throw new TezUncheckedException(e);
  }
}
 
Example 11
Source File: CompositeInputSplit.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}