org.apache.hadoop.mapreduce.lib.input.CombineFileSplit Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultiFileWordCount.java From big-c with Apache License 2.0 | 6 votes |
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); this.startOffset = split.getOffset(index); this.end = startOffset + split.getLength(index); boolean skipFirstLine = false; //open the file fileIn = fs.open(path); if (startOffset != 0) { skipFirstLine = true; --startOffset; fileIn.seek(startOffset); } reader = new LineReader(fileIn); if (skipFirstLine) { // skip first line and re-establish "startOffset". startOffset += reader.readLine(new Text(), 0, (int)Math.min((long)Integer.MAX_VALUE, end - startOffset)); } this.pos = startOffset; }
Example #2
Source File: RowRecordReader.java From gemfirexd-oss with Apache License 2.0 | 6 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l); long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS); instantiateGfxdLoner(conf); }
Example #3
Source File: HalyardBulkLoad.java From Halyard with Apache License 2.0 | 6 votes |
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = super.getSplits(job); long maxSize = MAX_SINGLE_FILE_MULTIPLIER * job.getConfiguration().getLong("mapreduce.input.fileinputformat.split.maxsize", 0); if (maxSize > 0) { List<InputSplit> newSplits = new ArrayList<>(); for (InputSplit spl : splits) { CombineFileSplit cfs = (CombineFileSplit)spl; for (int i=0; i<cfs.getNumPaths(); i++) { long length = cfs.getLength(); if (length > maxSize) { int replicas = (int)Math.ceil((double)length / (double)maxSize); Path path = cfs.getPath(i); for (int r=1; r<replicas; r++) { newSplits.add(new CombineFileSplit(new Path[]{path}, new long[]{r}, new long[]{length}, cfs.getLocations())); } } } } splits.addAll(newSplits); } return splits; }
Example #4
Source File: AvroKeyMapper.java From incubator-gobblin with Apache License 2.0 | 6 votes |
@Override protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context) throws IOException, InterruptedException { if (context.getNumReduceTasks() == 0) { context.write(key, NullWritable.get()); } else { populateComparableKeyRecord(key.datum(), this.outKey.datum()); this.outValue.datum(key.datum()); try { context.write(this.outKey, this.outValue); } catch (AvroRuntimeException e) { final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths(); throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e); } } context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1); }
Example #5
Source File: CombinedFileRecordReader.java From Cubert with Apache License 2.0 | 6 votes |
public CombinedFileRecordReader(InputFormat<K, V> inputFormat, CombineFileSplit combineFileSplit, TaskAttemptContext context) { this.inputFormat = inputFormat; this.combineFileSplit = combineFileSplit; this.context = context; long[] lengths = combineFileSplit.getLengths(); long totalLength = 0; for (long length : lengths) totalLength += length; fractionLength = new float[lengths.length]; for (int i = 0; i < lengths.length; i++) fractionLength[i] = ((float) lengths[i]) / totalLength; }
Example #6
Source File: HDFSSplitIterator.java From gemfirexd-oss with Apache License 2.0 | 6 votes |
public HDFSSplitIterator(FileSystem fs, Path[] paths, long[] offsets, long[] lengths, long startTime, long endTime) throws IOException { this.fs = fs; this.split = new CombineFileSplit(paths, offsets, lengths, null); while(currentHopIndex < split.getNumPaths() && !fs.exists(split.getPath(currentHopIndex))){ logger.warning(LocalizedStrings.HOPLOG_CLEANED_UP_BY_JANITOR, split.getPath(currentHopIndex)); currentHopIndex++; } if(currentHopIndex == split.getNumPaths()){ this.hoplog = null; iterator = null; } else { this.hoplog = getHoplog(fs,split.getPath(currentHopIndex)); iterator = hoplog.getReader().scan(split.getOffset(currentHopIndex), split.getLength(currentHopIndex)); } this.startTime = startTime; this.endTime = endTime; }
Example #7
Source File: MultiFileWordCount.java From hadoop with Apache License 2.0 | 6 votes |
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); this.startOffset = split.getOffset(index); this.end = startOffset + split.getLength(index); boolean skipFirstLine = false; //open the file fileIn = fs.open(path); if (startOffset != 0) { skipFirstLine = true; --startOffset; fileIn.seek(startOffset); } reader = new LineReader(fileIn); if (skipFirstLine) { // skip first line and re-establish "startOffset". startOffset += reader.readLine(new Text(), 0, (int)Math.min((long)Integer.MAX_VALUE, end - startOffset)); } this.pos = startOffset; }
Example #8
Source File: CombineFileInputFormat.java From hraven with Apache License 2.0 | 6 votes |
/** * Create a single split from the list of blocks specified in validBlocks * Add this new split into splitList. */ private void addCreatedSplit(List<InputSplit> splitList, List<String> locations, ArrayList<OneBlockInfo> validBlocks) { // create an input split Path[] fl = new Path[validBlocks.size()]; long[] offset = new long[validBlocks.size()]; long[] length = new long[validBlocks.size()]; for (int i = 0; i < validBlocks.size(); i++) { fl[i] = validBlocks.get(i).onepath; offset[i] = validBlocks.get(i).offset; length[i] = validBlocks.get(i).length; } // add this split to the list that is returned CombineFileSplit thissplit = new CombineFileSplit(fl, offset, length, locations.toArray(new String[0])); splitList.add(thissplit); }
Example #9
Source File: GridmixSplit.java From hadoop with Apache License 2.0 | 6 votes |
public GridmixSplit(CombineFileSplit cfsplit, int maps, int id, long inputBytes, long inputRecords, long outputBytes, long outputRecords, double[] reduceBytes, double[] reduceRecords, long[] reduceOutputBytes, long[] reduceOutputRecords) throws IOException { super(cfsplit); this.id = id; this.maps = maps; reduces = reduceBytes.length; this.inputRecords = inputRecords; this.outputBytes = outputBytes; this.outputRecords = outputRecords; this.reduceBytes = reduceBytes; this.reduceRecords = reduceRecords; nSpec = reduceOutputBytes.length; this.reduceOutputBytes = reduceOutputBytes; this.reduceOutputRecords = reduceOutputRecords; }
Example #10
Source File: CompactionCombineFileInputFormat.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186). */ private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException { if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) { // This issue was fixed in 2.3.0, if newer version, no need to clean up splits return splits; } List<InputSplit> cleanedSplits = Lists.newArrayList(); for (int i = 0; i < splits.size(); i++) { CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i); String[] locations = oldSplit.getLocations(); Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null"); if (locations.length > SPLIT_MAX_NUM_LOCATIONS) { locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS); } cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(), locations)); } return cleanedSplits; }
Example #11
Source File: LoadSplit.java From hadoop with Apache License 2.0 | 6 votes |
public LoadSplit(CombineFileSplit cfsplit, int maps, int id, long inputBytes, long inputRecords, long outputBytes, long outputRecords, double[] reduceBytes, double[] reduceRecords, long[] reduceOutputBytes, long[] reduceOutputRecords, ResourceUsageMetrics metrics, ResourceUsageMetrics[] rMetrics) throws IOException { super(cfsplit); this.id = id; this.maps = maps; reduces = reduceBytes.length; this.inputRecords = inputRecords; this.outputBytes = outputBytes; this.outputRecords = outputRecords; this.reduceBytes = reduceBytes; this.reduceRecords = reduceRecords; nSpec = reduceOutputBytes.length; this.reduceOutputBytes = reduceOutputBytes; this.reduceOutputRecords = reduceOutputRecords; this.mapMetrics = metrics; this.reduceMetrics = rMetrics; }
Example #12
Source File: TestFileQueue.java From hadoop with Apache License 2.0 | 6 votes |
@Test public void testRepeat() throws Exception { final Configuration conf = new Configuration(); Arrays.fill(loc, ""); Arrays.fill(start, 0L); Arrays.fill(len, BLOCK); final ByteArrayOutputStream out = fillVerif(); final FileQueue q = new FileQueue(new CombineFileSplit(paths, start, len, loc), conf); final byte[] verif = out.toByteArray(); final byte[] check = new byte[2 * NFILES * BLOCK]; q.read(check, 0, NFILES * BLOCK); assertArrayEquals(verif, Arrays.copyOf(check, NFILES * BLOCK)); final byte[] verif2 = new byte[2 * NFILES * BLOCK]; System.arraycopy(verif, 0, verif2, 0, verif.length); System.arraycopy(verif, 0, verif2, verif.length, verif.length); q.read(check, 0, 2 * NFILES * BLOCK); assertArrayEquals(verif2, check); }
Example #13
Source File: TestGridMixClasses.java From big-c with Apache License 2.0 | 6 votes |
private LoadSplit getLoadSplit() throws Exception { Path[] files = {new Path("one"), new Path("two")}; long[] start = {1, 2}; long[] lengths = {100, 200}; String[] locations = {"locOne", "loctwo"}; CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths, locations); ResourceUsageMetrics metrics = new ResourceUsageMetrics(); metrics.setCumulativeCpuUsage(200); ResourceUsageMetrics[] rMetrics = {metrics}; double[] reduceBytes = {8.1d, 8.2d}; double[] reduceRecords = {9.1d, 9.2d}; long[] reduceOutputBytes = {101L, 102L}; long[] reduceOutputRecords = {111L, 112L}; return new LoadSplit(cfSplit, 2, 1, 4L, 5L, 6L, 7L, reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords, metrics, rMetrics); }
Example #14
Source File: TestGridMixClasses.java From hadoop with Apache License 2.0 | 6 votes |
private LoadSplit getLoadSplit() throws Exception { Path[] files = {new Path("one"), new Path("two")}; long[] start = {1, 2}; long[] lengths = {100, 200}; String[] locations = {"locOne", "loctwo"}; CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths, locations); ResourceUsageMetrics metrics = new ResourceUsageMetrics(); metrics.setCumulativeCpuUsage(200); ResourceUsageMetrics[] rMetrics = {metrics}; double[] reduceBytes = {8.1d, 8.2d}; double[] reduceRecords = {9.1d, 9.2d}; long[] reduceOutputBytes = {101L, 102L}; long[] reduceOutputRecords = {111L, 112L}; return new LoadSplit(cfSplit, 2, 1, 4L, 5L, 6L, 7L, reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords, metrics, rMetrics); }
Example #15
Source File: TestFileQueue.java From big-c with Apache License 2.0 | 6 votes |
@Test public void testUneven() throws Exception { final Configuration conf = new Configuration(); Arrays.fill(loc, ""); Arrays.fill(start, 0L); Arrays.fill(len, BLOCK); final int B2 = BLOCK / 2; for (int i = 0; i < NFILES; i += 2) { start[i] += B2; len[i] -= B2; } final FileQueue q = new FileQueue(new CombineFileSplit(paths, start, len, loc), conf); final ByteArrayOutputStream out = fillVerif(); final byte[] verif = out.toByteArray(); final byte[] check = new byte[NFILES / 2 * BLOCK + NFILES / 2 * B2]; q.read(check, 0, verif.length); assertArrayEquals(verif, Arrays.copyOf(check, verif.length)); q.read(check, 0, verif.length); assertArrayEquals(verif, Arrays.copyOf(check, verif.length)); }
Example #16
Source File: TestFileQueue.java From big-c with Apache License 2.0 | 6 votes |
@Test public void testRepeat() throws Exception { final Configuration conf = new Configuration(); Arrays.fill(loc, ""); Arrays.fill(start, 0L); Arrays.fill(len, BLOCK); final ByteArrayOutputStream out = fillVerif(); final FileQueue q = new FileQueue(new CombineFileSplit(paths, start, len, loc), conf); final byte[] verif = out.toByteArray(); final byte[] check = new byte[2 * NFILES * BLOCK]; q.read(check, 0, NFILES * BLOCK); assertArrayEquals(verif, Arrays.copyOf(check, NFILES * BLOCK)); final byte[] verif2 = new byte[2 * NFILES * BLOCK]; System.arraycopy(verif, 0, verif2, 0, verif.length); System.arraycopy(verif, 0, verif2, verif.length, verif.length); q.read(check, 0, 2 * NFILES * BLOCK); assertArrayEquals(verif2, check); }
Example #17
Source File: RowRecordReader.java From gemfirexd-oss with Apache License 2.0 | 6 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l); long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS); instantiateGfxdLoner(conf); }
Example #18
Source File: LoadSplit.java From big-c with Apache License 2.0 | 6 votes |
public LoadSplit(CombineFileSplit cfsplit, int maps, int id, long inputBytes, long inputRecords, long outputBytes, long outputRecords, double[] reduceBytes, double[] reduceRecords, long[] reduceOutputBytes, long[] reduceOutputRecords, ResourceUsageMetrics metrics, ResourceUsageMetrics[] rMetrics) throws IOException { super(cfsplit); this.id = id; this.maps = maps; reduces = reduceBytes.length; this.inputRecords = inputRecords; this.outputBytes = outputBytes; this.outputRecords = outputRecords; this.reduceBytes = reduceBytes; this.reduceRecords = reduceRecords; nSpec = reduceOutputBytes.length; this.reduceOutputBytes = reduceOutputBytes; this.reduceOutputRecords = reduceOutputRecords; this.mapMetrics = metrics; this.reduceMetrics = rMetrics; }
Example #19
Source File: HDFSSplitIterator.java From gemfirexd-oss with Apache License 2.0 | 6 votes |
public HDFSSplitIterator(FileSystem fs, Path[] paths, long[] offsets, long[] lengths, long startTime, long endTime) throws IOException { this.fs = fs; this.split = new CombineFileSplit(paths, offsets, lengths, null); while(currentHopIndex < split.getNumPaths() && !fs.exists(split.getPath(currentHopIndex))){ logger.warning(LocalizedStrings.HOPLOG_CLEANED_UP_BY_JANITOR, split.getPath(currentHopIndex)); currentHopIndex++; } if(currentHopIndex == split.getNumPaths()){ this.hoplog = null; iterator = null; } else { this.hoplog = getHoplog(fs,split.getPath(currentHopIndex)); iterator = hoplog.getReader().scan(split.getOffset(currentHopIndex), split.getLength(currentHopIndex)); } this.startTime = startTime; this.endTime = endTime; }
Example #20
Source File: GridmixSplit.java From big-c with Apache License 2.0 | 6 votes |
public GridmixSplit(CombineFileSplit cfsplit, int maps, int id, long inputBytes, long inputRecords, long outputBytes, long outputRecords, double[] reduceBytes, double[] reduceRecords, long[] reduceOutputBytes, long[] reduceOutputRecords) throws IOException { super(cfsplit); this.id = id; this.maps = maps; reduces = reduceBytes.length; this.inputRecords = inputRecords; this.outputBytes = outputBytes; this.outputRecords = outputRecords; this.reduceBytes = reduceBytes; this.reduceRecords = reduceRecords; nSpec = reduceOutputBytes.length; this.reduceOutputBytes = reduceOutputBytes; this.reduceOutputRecords = reduceOutputRecords; }
Example #21
Source File: AbstractCombineFileRecordReader.java From kite with Apache License 2.0 | 6 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (delegate != null) { delegate.close(); } if (split instanceof CombineFileSplit) { CombineFileSplit combineSplit = (CombineFileSplit) split; FileSplit fileSplit = new FileSplit(combineSplit.getPath(idx), combineSplit.getOffset(idx), combineSplit.getLength(idx), combineSplit.getLocations()); delegate = getInputFormat().createRecordReader(fileSplit, context); delegate.initialize(fileSplit, context); } else { throw new DatasetOperationException( "Split is not a CombineFileSplit: %s:%s", split.getClass().getCanonicalName(), split); } }
Example #22
Source File: AvroKeyCombineFileRecordReader.java From incubator-gobblin with Apache License 2.0 | 5 votes |
private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException { Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration()); if (schema != null) { return schema; } Path path = split.getPath(idx); FileSystem fs = path.getFileSystem(cx.getConfiguration()); return AvroUtils.getSchemaFromDataFile(path, fs); }
Example #23
Source File: FileQueue.java From big-c with Apache License 2.0 | 5 votes |
/** * @param split Description of input sources. * @param conf Used to resolve FileSystem instances. */ public FileQueue(CombineFileSplit split, Configuration conf) throws IOException { this.conf = conf; paths = split.getPaths(); startoffset = split.getStartOffsets(); lengths = split.getLengths(); nextSource(); }
Example #24
Source File: HalyardBulkLoad.java From Halyard with Apache License 2.0 | 5 votes |
public ParserPump(CombineFileSplit split, TaskAttemptContext context) { this.context = context; this.paths = split.getPaths(); this.sizes = split.getLengths(); this.offsets = split.getStartOffsets(); this.size = split.getLength(); Configuration conf = context.getConfiguration(); this.skipInvalid = conf.getBoolean(SKIP_INVALID_PROPERTY, false); this.verifyDataTypeValues = conf.getBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, false); this.overrideRdfContext = conf.getBoolean(OVERRIDE_CONTEXT_PROPERTY, false); this.defaultRdfContextPattern = conf.get(DEFAULT_CONTEXT_PROPERTY); this.maxSize = MAX_SINGLE_FILE_MULTIPLIER * conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0); }
Example #25
Source File: TestGridMixClasses.java From big-c with Apache License 2.0 | 5 votes |
@Test (timeout=1000) public void testGridmixSplit() throws Exception { Path[] files = {new Path("one"), new Path("two")}; long[] start = {1, 2}; long[] lengths = {100, 200}; String[] locations = {"locOne", "loctwo"}; CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths, locations); ResourceUsageMetrics metrics = new ResourceUsageMetrics(); metrics.setCumulativeCpuUsage(200); double[] reduceBytes = {8.1d, 8.2d}; double[] reduceRecords = {9.1d, 9.2d}; long[] reduceOutputBytes = {101L, 102L}; long[] reduceOutputRecords = {111L, 112L}; GridmixSplit test = new GridmixSplit(cfSplit, 2, 3, 4L, 5L, 6L, 7L, reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords); ByteArrayOutputStream data = new ByteArrayOutputStream(); DataOutputStream out = new DataOutputStream(data); test.write(out); GridmixSplit copy = new GridmixSplit(); copy.readFields(new DataInputStream(new ByteArrayInputStream(data .toByteArray()))); // data should be the same assertEquals(test.getId(), copy.getId()); assertEquals(test.getMapCount(), copy.getMapCount()); assertEquals(test.getInputRecords(), copy.getInputRecords()); assertEquals(test.getOutputBytes()[0], copy.getOutputBytes()[0]); assertEquals(test.getOutputRecords()[0], copy.getOutputRecords()[0]); assertEquals(test.getReduceBytes(0), copy.getReduceBytes(0)); assertEquals(test.getReduceRecords(0), copy.getReduceRecords(0)); }
Example #26
Source File: AbstractGFRecordReader.java From gemfirexd-oss with Apache License 2.0 | 5 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); Configuration conf = context.getConfiguration(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l); }
Example #27
Source File: TestFileQueue.java From big-c with Apache License 2.0 | 5 votes |
@Test public void testEmpty() throws Exception { final Configuration conf = new Configuration(); // verify OK if unused final FileQueue q = new FileQueue(new CombineFileSplit( new Path[0], new long[0], new long[0], new String[0]), conf); }
Example #28
Source File: CsvBlurDriver.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
@SuppressWarnings("unused") public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index), split.getLocations()); delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context); }
Example #29
Source File: TestFilePool.java From big-c with Apache License 2.0 | 5 votes |
void checkSplitEq(FileSystem fs, CombineFileSplit split, long bytes) throws Exception { long splitBytes = 0L; HashSet<Path> uniq = new HashSet<Path>(); for (int i = 0; i < split.getNumPaths(); ++i) { splitBytes += split.getLength(i); assertTrue( split.getLength(i) <= fs.getFileStatus(split.getPath(i)).getLen()); assertFalse(uniq.contains(split.getPath(i))); uniq.add(split.getPath(i)); } assertEquals(bytes, splitBytes); }
Example #30
Source File: AbstractGFRecordReader.java From gemfirexd-oss with Apache License 2.0 | 5 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); Configuration conf = context.getConfiguration(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l); }