org.apache.hadoop.mapreduce.lib.input.CombineFileSplit Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.lib.input.CombineFileSplit. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: MultiFileWordCount.java From big-c with Apache License 2.0

6 votes

public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}

Example #2

Source File: RowRecordReader.java From gemfirexd-oss with Apache License 2.0

6 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
    throws IOException {
  Configuration conf = context.getConfiguration();
  CombineFileSplit cSplit =  (CombineFileSplit) split;
  Path[] path = cSplit.getPaths();
  long[] start = cSplit.getStartOffsets();
  long[] len = cSplit.getLengths();
  
  FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
  
  long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l);
  long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l);
  this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS);

  instantiateGfxdLoner(conf);
}

Example #3

Source File: HalyardBulkLoad.java From Halyard with Apache License 2.0

6 votes

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = super.getSplits(job);
    long maxSize = MAX_SINGLE_FILE_MULTIPLIER * job.getConfiguration().getLong("mapreduce.input.fileinputformat.split.maxsize", 0);
    if (maxSize > 0) {
        List<InputSplit> newSplits = new ArrayList<>();
        for (InputSplit spl : splits) {
            CombineFileSplit cfs = (CombineFileSplit)spl;
            for (int i=0; i<cfs.getNumPaths(); i++) {
                long length = cfs.getLength();
                if (length > maxSize) {
                    int replicas = (int)Math.ceil((double)length / (double)maxSize);
                    Path path = cfs.getPath(i);
                    for (int r=1; r<replicas; r++) {
                        newSplits.add(new CombineFileSplit(new Path[]{path}, new long[]{r}, new long[]{length}, cfs.getLocations()));
                    }
                }
            }
        }
        splits.addAll(newSplits);
    }
    return splits;
}

Example #4

Source File: AvroKeyMapper.java From incubator-gobblin with Apache License 2.0

6 votes

@Override
protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
    throws IOException, InterruptedException {
  if (context.getNumReduceTasks() == 0) {
    context.write(key, NullWritable.get());
  } else {
    populateComparableKeyRecord(key.datum(), this.outKey.datum());
    this.outValue.datum(key.datum());
    try {
      context.write(this.outKey, this.outValue);
    } catch (AvroRuntimeException e) {
      final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths();
      throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e);
    }
  }
  context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
}

Example #5

Source File: CombinedFileRecordReader.java From Cubert with Apache License 2.0

6 votes

public CombinedFileRecordReader(InputFormat<K, V> inputFormat,
                                CombineFileSplit combineFileSplit,
                                TaskAttemptContext context)
{
    this.inputFormat = inputFormat;
    this.combineFileSplit = combineFileSplit;
    this.context = context;

    long[] lengths = combineFileSplit.getLengths();
    long totalLength = 0;
    for (long length : lengths)
        totalLength += length;
    fractionLength = new float[lengths.length];
    for (int i = 0; i < lengths.length; i++)
        fractionLength[i] = ((float) lengths[i]) / totalLength;
}

Example #6

Source File: HDFSSplitIterator.java From gemfirexd-oss with Apache License 2.0

6 votes

public HDFSSplitIterator(FileSystem fs, Path[] paths, long[] offsets, long[] lengths, long startTime, long endTime) throws IOException {
  this.fs = fs;
  this.split = new CombineFileSplit(paths, offsets, lengths, null);
  while(currentHopIndex < split.getNumPaths() && !fs.exists(split.getPath(currentHopIndex))){
    logger.warning(LocalizedStrings.HOPLOG_CLEANED_UP_BY_JANITOR, split.getPath(currentHopIndex));
    currentHopIndex++;
  }
  if(currentHopIndex == split.getNumPaths()){
    this.hoplog = null;
    iterator = null;
  } else {
    this.hoplog = getHoplog(fs,split.getPath(currentHopIndex));
    iterator = hoplog.getReader().scan(split.getOffset(currentHopIndex), split.getLength(currentHopIndex));
  }
  this.startTime = startTime;
  this.endTime = endTime;
}

Example #7

Source File: MultiFileWordCount.java From hadoop with Apache License 2.0

6 votes

public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}

Example #8

Source File: CombineFileInputFormat.java From hraven with Apache License 2.0

6 votes

/**
 * Create a single split from the list of blocks specified in validBlocks
 * Add this new split into splitList.
 */
private void addCreatedSplit(List<InputSplit> splitList, 
                             List<String> locations, 
                             ArrayList<OneBlockInfo> validBlocks) {
  // create an input split
  Path[] fl = new Path[validBlocks.size()];
  long[] offset = new long[validBlocks.size()];
  long[] length = new long[validBlocks.size()];
  for (int i = 0; i < validBlocks.size(); i++) {
    fl[i] = validBlocks.get(i).onepath; 
    offset[i] = validBlocks.get(i).offset;
    length[i] = validBlocks.get(i).length;
  }

   // add this split to the list that is returned
  CombineFileSplit thissplit = new CombineFileSplit(fl, offset, 
                                 length, locations.toArray(new String[0]));
  splitList.add(thissplit); 
}

Example #9

Source File: GridmixSplit.java From hadoop with Apache License 2.0

6 votes

public GridmixSplit(CombineFileSplit cfsplit, int maps, int id,
    long inputBytes, long inputRecords, long outputBytes,
    long outputRecords, double[] reduceBytes, double[] reduceRecords,
    long[] reduceOutputBytes, long[] reduceOutputRecords)
    throws IOException {
  super(cfsplit);
  this.id = id;
  this.maps = maps;
  reduces = reduceBytes.length;
  this.inputRecords = inputRecords;
  this.outputBytes = outputBytes;
  this.outputRecords = outputRecords;
  this.reduceBytes = reduceBytes;
  this.reduceRecords = reduceRecords;
  nSpec = reduceOutputBytes.length;
  this.reduceOutputBytes = reduceOutputBytes;
  this.reduceOutputRecords = reduceOutputRecords;
}

Example #10

Source File: CompactionCombineFileInputFormat.java From incubator-gobblin with Apache License 2.0

6 votes

/**
 * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
 * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
 */
private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
  if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) {
    // This issue was fixed in 2.3.0, if newer version, no need to clean up splits
    return splits;
  }

  List<InputSplit> cleanedSplits = Lists.newArrayList();

  for (int i = 0; i < splits.size(); i++) {
    CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
    String[] locations = oldSplit.getLocations();

    Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");

    if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
      locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
    }

    cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(),
        locations));
  }
  return cleanedSplits;
}

Example #11

Source File: LoadSplit.java From hadoop with Apache License 2.0

6 votes

public LoadSplit(CombineFileSplit cfsplit, int maps, int id, long inputBytes, 
                 long inputRecords, long outputBytes, long outputRecords, 
                 double[] reduceBytes, double[] reduceRecords, 
                 long[] reduceOutputBytes, long[] reduceOutputRecords,
                 ResourceUsageMetrics metrics,
                 ResourceUsageMetrics[] rMetrics)
throws IOException {
  super(cfsplit);
  this.id = id;
  this.maps = maps;
  reduces = reduceBytes.length;
  this.inputRecords = inputRecords;
  this.outputBytes = outputBytes;
  this.outputRecords = outputRecords;
  this.reduceBytes = reduceBytes;
  this.reduceRecords = reduceRecords;
  nSpec = reduceOutputBytes.length;
  this.reduceOutputBytes = reduceOutputBytes;
  this.reduceOutputRecords = reduceOutputRecords;
  this.mapMetrics = metrics;
  this.reduceMetrics = rMetrics;
}

Example #12

Source File: TestFileQueue.java From hadoop with Apache License 2.0

6 votes

@Test
public void testRepeat() throws Exception {
  final Configuration conf = new Configuration();
  Arrays.fill(loc, "");
  Arrays.fill(start, 0L);
  Arrays.fill(len, BLOCK);

  final ByteArrayOutputStream out = fillVerif();
  final FileQueue q =
    new FileQueue(new CombineFileSplit(paths, start, len, loc), conf);
  final byte[] verif = out.toByteArray();
  final byte[] check = new byte[2 * NFILES * BLOCK];
  q.read(check, 0, NFILES * BLOCK);
  assertArrayEquals(verif, Arrays.copyOf(check, NFILES * BLOCK));

  final byte[] verif2 = new byte[2 * NFILES * BLOCK];
  System.arraycopy(verif, 0, verif2, 0, verif.length);
  System.arraycopy(verif, 0, verif2, verif.length, verif.length);
  q.read(check, 0, 2 * NFILES * BLOCK);
  assertArrayEquals(verif2, check);

}

Example #13

Source File: TestGridMixClasses.java From big-c with Apache License 2.0

6 votes

private LoadSplit getLoadSplit() throws Exception {

    Path[] files = {new Path("one"), new Path("two")};
    long[] start = {1, 2};
    long[] lengths = {100, 200};
    String[] locations = {"locOne", "loctwo"};

    CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths,
            locations);
    ResourceUsageMetrics metrics = new ResourceUsageMetrics();
    metrics.setCumulativeCpuUsage(200);
    ResourceUsageMetrics[] rMetrics = {metrics};

    double[] reduceBytes = {8.1d, 8.2d};
    double[] reduceRecords = {9.1d, 9.2d};
    long[] reduceOutputBytes = {101L, 102L};
    long[] reduceOutputRecords = {111L, 112L};

    return new LoadSplit(cfSplit, 2, 1, 4L, 5L, 6L, 7L,
            reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords,
            metrics, rMetrics);
  }

Example #14

Source File: TestGridMixClasses.java From hadoop with Apache License 2.0

6 votes

private LoadSplit getLoadSplit() throws Exception {

    Path[] files = {new Path("one"), new Path("two")};
    long[] start = {1, 2};
    long[] lengths = {100, 200};
    String[] locations = {"locOne", "loctwo"};

    CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths,
            locations);
    ResourceUsageMetrics metrics = new ResourceUsageMetrics();
    metrics.setCumulativeCpuUsage(200);
    ResourceUsageMetrics[] rMetrics = {metrics};

    double[] reduceBytes = {8.1d, 8.2d};
    double[] reduceRecords = {9.1d, 9.2d};
    long[] reduceOutputBytes = {101L, 102L};
    long[] reduceOutputRecords = {111L, 112L};

    return new LoadSplit(cfSplit, 2, 1, 4L, 5L, 6L, 7L,
            reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords,
            metrics, rMetrics);
  }

Example #15

Source File: TestFileQueue.java From big-c with Apache License 2.0

6 votes

@Test
public void testUneven() throws Exception {
  final Configuration conf = new Configuration();
  Arrays.fill(loc, "");
  Arrays.fill(start, 0L);
  Arrays.fill(len, BLOCK);

  final int B2 = BLOCK / 2;
  for (int i = 0; i < NFILES; i += 2) {
    start[i] += B2;
    len[i] -= B2;
  }
  final FileQueue q =
    new FileQueue(new CombineFileSplit(paths, start, len, loc), conf);
  final ByteArrayOutputStream out = fillVerif();
  final byte[] verif = out.toByteArray();
  final byte[] check = new byte[NFILES / 2 * BLOCK + NFILES / 2 * B2];
  q.read(check, 0, verif.length);
  assertArrayEquals(verif, Arrays.copyOf(check, verif.length));
  q.read(check, 0, verif.length);
  assertArrayEquals(verif, Arrays.copyOf(check, verif.length));
}

Example #16

Source File: TestFileQueue.java From big-c with Apache License 2.0

6 votes

@Test
public void testRepeat() throws Exception {
  final Configuration conf = new Configuration();
  Arrays.fill(loc, "");
  Arrays.fill(start, 0L);
  Arrays.fill(len, BLOCK);

  final ByteArrayOutputStream out = fillVerif();
  final FileQueue q =
    new FileQueue(new CombineFileSplit(paths, start, len, loc), conf);
  final byte[] verif = out.toByteArray();
  final byte[] check = new byte[2 * NFILES * BLOCK];
  q.read(check, 0, NFILES * BLOCK);
  assertArrayEquals(verif, Arrays.copyOf(check, NFILES * BLOCK));

  final byte[] verif2 = new byte[2 * NFILES * BLOCK];
  System.arraycopy(verif, 0, verif2, 0, verif.length);
  System.arraycopy(verif, 0, verif2, verif.length, verif.length);
  q.read(check, 0, 2 * NFILES * BLOCK);
  assertArrayEquals(verif2, check);

}

Example #17

Source File: RowRecordReader.java From gemfirexd-oss with Apache License 2.0

6 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
    throws IOException {
  Configuration conf = context.getConfiguration();
  CombineFileSplit cSplit =  (CombineFileSplit) split;
  Path[] path = cSplit.getPaths();
  long[] start = cSplit.getStartOffsets();
  long[] len = cSplit.getLengths();
  
  FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
  
  long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l);
  long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l);
  this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS);

  instantiateGfxdLoner(conf);
}

Example #18

Source File: LoadSplit.java From big-c with Apache License 2.0

6 votes

public LoadSplit(CombineFileSplit cfsplit, int maps, int id, long inputBytes, 
                 long inputRecords, long outputBytes, long outputRecords, 
                 double[] reduceBytes, double[] reduceRecords, 
                 long[] reduceOutputBytes, long[] reduceOutputRecords,
                 ResourceUsageMetrics metrics,
                 ResourceUsageMetrics[] rMetrics)
throws IOException {
  super(cfsplit);
  this.id = id;
  this.maps = maps;
  reduces = reduceBytes.length;
  this.inputRecords = inputRecords;
  this.outputBytes = outputBytes;
  this.outputRecords = outputRecords;
  this.reduceBytes = reduceBytes;
  this.reduceRecords = reduceRecords;
  nSpec = reduceOutputBytes.length;
  this.reduceOutputBytes = reduceOutputBytes;
  this.reduceOutputRecords = reduceOutputRecords;
  this.mapMetrics = metrics;
  this.reduceMetrics = rMetrics;
}

Example #19

Source File: HDFSSplitIterator.java From gemfirexd-oss with Apache License 2.0

6 votes

public HDFSSplitIterator(FileSystem fs, Path[] paths, long[] offsets, long[] lengths, long startTime, long endTime) throws IOException {
  this.fs = fs;
  this.split = new CombineFileSplit(paths, offsets, lengths, null);
  while(currentHopIndex < split.getNumPaths() && !fs.exists(split.getPath(currentHopIndex))){
    logger.warning(LocalizedStrings.HOPLOG_CLEANED_UP_BY_JANITOR, split.getPath(currentHopIndex));
    currentHopIndex++;
  }
  if(currentHopIndex == split.getNumPaths()){
    this.hoplog = null;
    iterator = null;
  } else {
    this.hoplog = getHoplog(fs,split.getPath(currentHopIndex));
    iterator = hoplog.getReader().scan(split.getOffset(currentHopIndex), split.getLength(currentHopIndex));
  }
  this.startTime = startTime;
  this.endTime = endTime;
}

Example #20

Source File: GridmixSplit.java From big-c with Apache License 2.0

6 votes

public GridmixSplit(CombineFileSplit cfsplit, int maps, int id,
    long inputBytes, long inputRecords, long outputBytes,
    long outputRecords, double[] reduceBytes, double[] reduceRecords,
    long[] reduceOutputBytes, long[] reduceOutputRecords)
    throws IOException {
  super(cfsplit);
  this.id = id;
  this.maps = maps;
  reduces = reduceBytes.length;
  this.inputRecords = inputRecords;
  this.outputBytes = outputBytes;
  this.outputRecords = outputRecords;
  this.reduceBytes = reduceBytes;
  this.reduceRecords = reduceRecords;
  nSpec = reduceOutputBytes.length;
  this.reduceOutputBytes = reduceOutputBytes;
  this.reduceOutputRecords = reduceOutputRecords;
}

Example #21

Source File: AbstractCombineFileRecordReader.java From kite with Apache License 2.0

6 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
  if (delegate != null) {
    delegate.close();
  }
  if (split instanceof CombineFileSplit) {
    CombineFileSplit combineSplit = (CombineFileSplit) split;
    FileSplit fileSplit = new FileSplit(combineSplit.getPath(idx), combineSplit.getOffset(idx),
        combineSplit.getLength(idx), combineSplit.getLocations());
    delegate = getInputFormat().createRecordReader(fileSplit, context);
    delegate.initialize(fileSplit, context);
  } else {
    throw new DatasetOperationException(
        "Split is not a CombineFileSplit: %s:%s",
        split.getClass().getCanonicalName(), split);
  }
}

Example #22

Source File: AvroKeyCombineFileRecordReader.java From incubator-gobblin with Apache License 2.0

5 votes

private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException {
  Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration());
  if (schema != null) {
    return schema;
  }

  Path path = split.getPath(idx);
  FileSystem fs = path.getFileSystem(cx.getConfiguration());
  return AvroUtils.getSchemaFromDataFile(path, fs);
}

Example #23

Source File: FileQueue.java From big-c with Apache License 2.0

5 votes

/**
 * @param split Description of input sources.
 * @param conf Used to resolve FileSystem instances.
 */
public FileQueue(CombineFileSplit split, Configuration conf)
    throws IOException {
  this.conf = conf;
  paths = split.getPaths();
  startoffset = split.getStartOffsets();
  lengths = split.getLengths();
  nextSource();
}

Example #24

Source File: HalyardBulkLoad.java From Halyard with Apache License 2.0

5 votes

public ParserPump(CombineFileSplit split, TaskAttemptContext context) {
    this.context = context;
    this.paths = split.getPaths();
    this.sizes = split.getLengths();
    this.offsets = split.getStartOffsets();
    this.size = split.getLength();
    Configuration conf = context.getConfiguration();
    this.skipInvalid = conf.getBoolean(SKIP_INVALID_PROPERTY, false);
    this.verifyDataTypeValues = conf.getBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, false);
    this.overrideRdfContext = conf.getBoolean(OVERRIDE_CONTEXT_PROPERTY, false);
    this.defaultRdfContextPattern = conf.get(DEFAULT_CONTEXT_PROPERTY);
    this.maxSize = MAX_SINGLE_FILE_MULTIPLIER * conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0);
}

Example #25

Source File: TestGridMixClasses.java From big-c with Apache License 2.0

5 votes

@Test (timeout=1000)
public void testGridmixSplit() throws Exception {
  Path[] files = {new Path("one"), new Path("two")};
  long[] start = {1, 2};
  long[] lengths = {100, 200};
  String[] locations = {"locOne", "loctwo"};

  CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths,
          locations);
  ResourceUsageMetrics metrics = new ResourceUsageMetrics();
  metrics.setCumulativeCpuUsage(200);

  double[] reduceBytes = {8.1d, 8.2d};
  double[] reduceRecords = {9.1d, 9.2d};
  long[] reduceOutputBytes = {101L, 102L};
  long[] reduceOutputRecords = {111L, 112L};

  GridmixSplit test = new GridmixSplit(cfSplit, 2, 3, 4L, 5L, 6L, 7L,
          reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords);

  ByteArrayOutputStream data = new ByteArrayOutputStream();
  DataOutputStream out = new DataOutputStream(data);
  test.write(out);
  GridmixSplit copy = new GridmixSplit();
  copy.readFields(new DataInputStream(new ByteArrayInputStream(data
          .toByteArray())));

  // data should be the same
  assertEquals(test.getId(), copy.getId());
  assertEquals(test.getMapCount(), copy.getMapCount());
  assertEquals(test.getInputRecords(), copy.getInputRecords());

  assertEquals(test.getOutputBytes()[0], copy.getOutputBytes()[0]);
  assertEquals(test.getOutputRecords()[0], copy.getOutputRecords()[0]);
  assertEquals(test.getReduceBytes(0), copy.getReduceBytes(0));
  assertEquals(test.getReduceRecords(0), copy.getReduceRecords(0));

}

Example #26

Source File: AbstractGFRecordReader.java From gemfirexd-oss with Apache License 2.0

5 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
  CombineFileSplit cSplit = (CombineFileSplit) split;
  Path[] path = cSplit.getPaths();
  long[] start = cSplit.getStartOffsets();
  long[] len = cSplit.getLengths();

  Configuration conf = context.getConfiguration();
  FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
  
  this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}

Example #27

Source File: TestFileQueue.java From big-c with Apache License 2.0

5 votes

@Test
public void testEmpty() throws Exception {
  final Configuration conf = new Configuration();
  // verify OK if unused
  final FileQueue q = new FileQueue(new CombineFileSplit(
        new Path[0], new long[0], new long[0], new String[0]), conf);
}

Example #28

Source File: CsvBlurDriver.java From incubator-retired-blur with Apache License 2.0

5 votes

@SuppressWarnings("unused")
public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
    throws IOException {
  fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
      split.getLocations());
  delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
}

Example #29

Source File: TestFilePool.java From big-c with Apache License 2.0

5 votes

void checkSplitEq(FileSystem fs, CombineFileSplit split, long bytes)
    throws Exception {
  long splitBytes = 0L;
  HashSet<Path> uniq = new HashSet<Path>();
  for (int i = 0; i < split.getNumPaths(); ++i) {
    splitBytes += split.getLength(i);
    assertTrue(
        split.getLength(i) <= fs.getFileStatus(split.getPath(i)).getLen());
    assertFalse(uniq.contains(split.getPath(i)));
    uniq.add(split.getPath(i));
  }
  assertEquals(bytes, splitBytes);
}

Example #30

Source File: AbstractGFRecordReader.java From gemfirexd-oss with Apache License 2.0

5 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
  CombineFileSplit cSplit = (CombineFileSplit) split;
  Path[] path = cSplit.getPaths();
  long[] start = cSplit.getStartOffsets();
  long[] len = cSplit.getLengths();

  Configuration conf = context.getConfiguration();
  FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
  
  this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}