org.apache.hadoop.mapreduce.InputSplit Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.InputSplit. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TeraSortIngest.java    From accumulo-examples with Apache License 2.0 7 votes vote down vote up
/**
 * Create the desired number of splits, dividing the number of rows between the mappers.
 */
@Override
public List<InputSplit> getSplits(JobContext job) {
  long totalRows = job.getConfiguration().getLong(NUMROWS, 0);
  int numSplits = job.getConfiguration().getInt(NUMSPLITS, 1);
  long rowsPerSplit = totalRows / numSplits;
  log.info(
      "Generating " + totalRows + " using " + numSplits + " maps with step of " + rowsPerSplit);
  ArrayList<InputSplit> splits = new ArrayList<>(numSplits);
  long currentRow = 0;
  for (int split = 0; split < numSplits - 1; ++split) {
    splits.add(new RangeInputSplit(currentRow, rowsPerSplit));
    currentRow += rowsPerSplit;
  }
  splits.add(new RangeInputSplit(currentRow, totalRows - currentRow));
  log.info("Done Generating.");
  return splits;
}
 
Example #2
Source File: SMInputFormat.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
public SMRecordReaderImpl getRecordReader(InputSplit split, Configuration config) throws IOException,
        InterruptedException {
    config.addResource(conf);
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "getRecordReader with table=%s, inputTable=%s," +
                "conglomerate=%s",
                table,
                config.get(TableInputFormat.INPUT_TABLE),
                config.get(MRConstants.SPLICE_INPUT_CONGLOMERATE));
    rr = new SMRecordReaderImpl(conf);
    if(table == null){
        TableName tableInfo = TableName.valueOf(config.get(TableInputFormat.INPUT_TABLE));
        PartitionFactory tableFactory=SIDriver.driver().getTableFactory();
        table = ((ClientPartition)tableFactory.getTable(tableInfo)).unwrapDelegate();
    }
    rr.setHTable(table);
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "returning record reader");
    return rr;
}
 
Example #3
Source File: GenerateData.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}
 
Example #4
Source File: DelimitedTextReader.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
protected void initParser(InputSplit inSplit) throws IOException,
    InterruptedException {
    fileIn = openFile(inSplit, true);
    if (fileIn == null) {
        return;
    }
    instream = new InputStreamReader(fileIn, encoding);

    bytesRead = 0;
    fileLen = inSplit.getLength();
    if (uriName == null) {
        generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false);
        if (generateId) {
            idGen = new IdGenerator(file.toUri().getPath() + "-"
                + ((FileSplit) inSplit).getStart());
        } else {
            uriId = 0;
        }
    }
    parser = new CSVParser(instream, CSVParserFormatter.
    		getFormat(delimiter, encapsulator, true,
    				true));
    parserIterator = parser.iterator();
}
 
Example #5
Source File: TestFileInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitLocationInfo() throws Exception {
  Configuration conf = getConfiguration();
  conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
      "test:///a1/a2");
  Job job = Job.getInstance(conf);
  TextInputFormat fileInputFormat = new TextInputFormat();
  List<InputSplit> splits = fileInputFormat.getSplits(job);
  String[] locations = splits.get(0).getLocations();
  Assert.assertEquals(2, locations.length);
  SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo();
  Assert.assertEquals(2, locationInfo.length);
  SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
      locationInfo[0] : locationInfo[1];
  SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
      locationInfo[0] : locationInfo[1];
  Assert.assertTrue(localhostInfo.isOnDisk());
  Assert.assertTrue(localhostInfo.isInMemory());
  Assert.assertTrue(otherhostInfo.isOnDisk());
  Assert.assertFalse(otherhostInfo.isInMemory());
}
 
Example #6
Source File: CSVFileInputFormat.java    From components with Apache License 2.0 6 votes vote down vote up
@Override
public CSVFileRecordReader createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
  String delimiter = context.getConfiguration().get(TALEND_ROW_DELIMITED);
  String encoding = context.getConfiguration().get(TALEND_ENCODING);

  String textEnclosure = context.getConfiguration().get(TALEND_TEXT_ENCLOSURE);
  String escapeChar = context.getConfiguration().get(TALEND_ESCAPE);

  Character te = null;
  Character ec = null;

  if (textEnclosure != null && !textEnclosure.isEmpty()) {
    te = textEnclosure.charAt(0);
  }

  if (escapeChar != null && !escapeChar.isEmpty()) {
    ec = escapeChar.charAt(0);
  }

  return createRecordReader(delimiter, encoding, te, ec);
}
 
Example #7
Source File: HadoopElementIterator.java    From tinkerpop with Apache License 2.0 6 votes vote down vote up
public HadoopElementIterator(final HadoopGraph graph) {
    try {
        this.graph = graph;
        final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration());
        final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration);
        if (inputFormat instanceof FileInputFormat) {
            final Storage storage = FileSystemStorage.open(configuration);
            if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
                return; // there is no input location and thus, no data (empty graph)
            if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent())
                return; // there is no data at the input location (empty graph)
            configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get());
        }
        final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1)));
        for (final InputSplit split : splits) {
            this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID())));
        }
    } catch (final Exception e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}
 
Example #8
Source File: TestCRAMInputFormat.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
@Test
public void testReader() throws Exception {
  int expectedCount = 0;
  SamReader samReader = SamReaderFactory.makeDefault()
      .referenceSequence(new File(URI.create(reference))).open(new File(input));
  for (SAMRecord r : samReader) {
    expectedCount++;
  }

  AnySAMInputFormat inputFormat = new AnySAMInputFormat();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);
  assertEquals(1, splits.size());
  RecordReader<LongWritable, SAMRecordWritable> reader = inputFormat
      .createRecordReader(splits.get(0), taskAttemptContext);
  reader.initialize(splits.get(0), taskAttemptContext);

  int actualCount = 0;
  while (reader.nextKeyValue()) {
    actualCount++;
  }

  assertEquals(expectedCount, actualCount);
}
 
Example #9
Source File: BitcoinRawBlockFileInputFormat.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<BytesWritable,BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext ctx)  throws IOException {
	/** Create reader **/
	try {
		return new BitcoinRawBlockRecordReader(ctx.getConfiguration());
	} catch (HadoopCryptoLedgerConfigurationException e) {
		// log
		LOG.error(e);
	}
	return null;
}
 
Example #10
Source File: MapReduceDataStore.java    From geowave with Apache License 2.0 5 votes vote down vote up
public List<InputSplit> getSplits(
CommonQueryOptions commonOptions,
DataTypeQueryOptions<?> typeOptions,
IndexQueryOptions indexOptions,
QueryConstraints constraints,
TransientAdapterStore adapterStore,
AdapterIndexMappingStore aimStore,
DataStatisticsStore statsStore,
InternalAdapterStore internalAdapterStore,
IndexStore indexStore,
JobContext context,
Integer minSplits,
Integer maxSplits) throws IOException, InterruptedException;
 
Example #11
Source File: TabletSplitSplit.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * Collect a set of hosts from all child InputSplits.
 * 
 * @throws InterruptedException
 */
public String[] getLocations() throws IOException, InterruptedException {
    HashSet<String> hosts = new HashSet<>();
    for (InputSplit s : splits) {
        String[] hints = s.getLocations();
        if (hints != null && hints.length > 0) {
            Collections.addAll(hosts, hints);
        }
    }
    return hosts.toArray(new String[hosts.size()]);
}
 
Example #12
Source File: BitcoinBlockFileInputFormat.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<BytesWritable,BitcoinBlock> createRecordReader(InputSplit split, TaskAttemptContext ctx) throws IOException {	
	/** Create reader **/
	try {
		return new BitcoinBlockRecordReader(ctx.getConfiguration());
	} catch (HadoopCryptoLedgerConfigurationException e) {
		// log
		LOG.error(e);
	}
	return null;
}
 
Example #13
Source File: DelimitedJSONReader.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    /* Initialization in super class */
    initConfig(context);  
    /*  Get file(s) in input split */
    setFile(((FileSplit) inSplit).getPath());
    // Initialize reader properties
    generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI,false);
    if (generateId){
        idGen = new IdGenerator(file.toUri().getPath() + "-"
                + ((FileSplit) inSplit).getStart()); 
    } else {
        uriName = conf.get(CONF_INPUT_URI_ID, null);
        mapper = new ObjectMapper();
    }
    bytesRead = 0;
    totalBytes = inSplit.getLength();
    /* Check file status */
    fs = file.getFileSystem(context.getConfiguration());
    FileStatus status = fs.getFileStatus(file);
    if (status.isDirectory()) {
        iterator = new FileIterator((FileSplit)inSplit, context);
        inSplit = iterator.next();
    }
    /* Initialize buffered reader */
    initFileStream(inSplit);
}
 
Example #14
Source File: AbstractEthereumRecordReader.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
/***
 * Initializes readers
 * 
 * @param split Split to be used (asssumed to be a file split)
 * ϟaram context context of the job
 * @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
 * @throws java.lang.InterruptedException in case of thread interruption
 * 
 */

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
	   FileSplit fSplit = (FileSplit)split;
	   // Initialize start and end of split
	      start = fSplit.getStart();
	      end = start + fSplit.getLength();
	      final Path file = fSplit.getPath();
	      codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
	      final FileSystem fs = file.getFileSystem(context.getConfiguration());
	      FSDataInputStream fileIn = fs.open(file);
	      // open stream
	        if (isCompressedInput()) { // decompress
	        	decompressor = CodecPool.getDecompressor(codec);
	        	if (codec instanceof SplittableCompressionCodec) {
	  		
	          	final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
	  				ebr = new EthereumBlockReader(cIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
	  				start = cIn.getAdjustedStart();
	         		end = cIn.getAdjustedEnd();
	          	filePosition = cIn; // take pos from compressed stream
	        } else {
	        	ebr = new EthereumBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
	        	filePosition = fileIn;
	        }
	      } else {
	        fileIn.seek(start);
	        ebr = new EthereumBlockReader(fileIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
	        filePosition = fileIn;
	      }
}
 
Example #15
Source File: TeraGen.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
/**
 * Create the desired number of splits, dividing the number of rows
 * between the mappers.
 */
public List<InputSplit> getSplits(JobContext job) {
  long totalRows = getNumberOfRows(job);
  int numSplits = job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
  LOG.info("Generating " + totalRows + " using " + numSplits);
  List<InputSplit> splits = new ArrayList<InputSplit>();
  long currentRow = 0;
  for(int split = 0; split < numSplits; ++split) {
    long goal = 
      (long) Math.ceil(totalRows * (double)(split + 1) / numSplits);
    splits.add(new RangeInputSplit(currentRow, goal - currentRow));
    currentRow = goal;
  }
  return splits;
}
 
Example #16
Source File: MergeDictionaryJob.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException {
    int numMapTasks = jobContext.getConfiguration().getInt("num.map.tasks", 0);
    List<InputSplit> inputSplits = Lists.newArrayListWithCapacity(numMapTasks);

    for (int i = 0; i < numMapTasks; i++) {
        inputSplits.add(new IntInputSplit(i));
    }

    return inputSplits;
}
 
Example #17
Source File: TeraGen.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Create the desired number of splits, dividing the number of rows
 * between the mappers.
 */
public List<InputSplit> getSplits(JobContext job) {
  long totalRows = getNumberOfRows(job);
  int numSplits = job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
  LOG.info("Generating " + totalRows + " using " + numSplits);
  List<InputSplit> splits = new ArrayList<InputSplit>();
  long currentRow = 0;
  for(int split = 0; split < numSplits; ++split) {
    long goal = 
      (long) Math.ceil(totalRows * (double)(split + 1) / numSplits);
    splits.add(new RangeInputSplit(currentRow, goal - currentRow));
    currentRow = goal;
  }
  return splits;
}
 
Example #18
Source File: TestMRKeyValueTextInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Test using the gzip codec for reading
 */
@Test
public void testGzip() throws IOException, InterruptedException {
  Configuration conf = new Configuration(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, conf);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, 
            "line-1\tthe quick\nline-2\tbrown\nline-3\t" +
            "fox jumped\nline-4\tover\nline-5\t the lazy\nline-6\t dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "line-1\tthis is a test\nline-1\tof gzip\n");
  Job job = Job.getInstance(conf);
  FileInputFormat.setInputPaths(job, workDir);
  KeyValueTextInputFormat format = new KeyValueTextInputFormat();
  List<InputSplit> splits = format.getSplits(job);
  assertEquals("compressed splits == 2", 2, splits.size());
  FileSplit tmp = (FileSplit) splits.get(0);
  if (tmp.getPath().getName().equals("part2.txt.gz")) {
    splits.set(0, splits.get(1));
    splits.set(1, tmp);
  }
  List<Text> results = readSplit(format, splits.get(0), job);
  assertEquals("splits[0] length", 6, results.size());
  assertEquals("splits[0][0]", "the quick", results.get(0).toString());
  assertEquals("splits[0][1]", "brown", results.get(1).toString());
  assertEquals("splits[0][2]", "fox jumped", results.get(2).toString());
  assertEquals("splits[0][3]", "over", results.get(3).toString());
  assertEquals("splits[0][4]", " the lazy", results.get(4).toString());
  assertEquals("splits[0][5]", " dog", results.get(5).toString());
  results = readSplit(format, splits.get(1), job);
  assertEquals("splits[1] length", 2, results.size());
  assertEquals("splits[1][0]", "this is a test", 
               results.get(0).toString());    
  assertEquals("splits[1][1]", "of gzip", 
               results.get(1).toString());    
}
 
Example #19
Source File: PageRankAlgorithm.java    From rheem with Apache License 2.0 5 votes vote down vote up
@Override
public VertexReader<LongWritable, DoubleWritable,
        FloatWritable> createVertexReader(InputSplit split,
                                          TaskAttemptContext context)
        throws IOException {
    return new PageRankVertexReader();
}
 
Example #20
Source File: AbstractEventRecordReader.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
    initializeEvent(context.getConfiguration());
    
    if (genericSplit instanceof FileSplit) {
        final Path p = ((FileSplit) genericSplit).getPath();
        final FileSystem sys = p.getFileSystem(context.getConfiguration());
        
        rawFileName = p.toString();
        rawFileTimeStamp = sys.getFileStatus(p).getModificationTime();
    }
}
 
Example #21
Source File: InputFormatHadoop.java    From grakn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    reader.initialize(inputSplit, taskAttemptContext);

    Configuration conf = taskAttemptContext.getConfiguration();
    if (conf.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null) {
        graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(conf),
                                                      Constants.GREMLIN_HADOOP_GRAPH_FILTER);
    }
}
 
Example #22
Source File: TeraInputFormat.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public void initialize(InputSplit split, TaskAttemptContext context) 
    throws IOException, InterruptedException {
  Path p = ((FileSplit)split).getPath();
  FileSystem fs = p.getFileSystem(context.getConfiguration());
  in = fs.open(p);
  long start = ((FileSplit)split).getStart();
  // find the offset to start at a record boundary
  offset = (RECORD_LENGTH - (start % RECORD_LENGTH)) % RECORD_LENGTH;
  in.seek(start + offset);
  length = ((FileSplit)split).getLength();
}
 
Example #23
Source File: CombineShimRecordReader.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit curSplit, TaskAttemptContext curContext)
    throws IOException, InterruptedException {
  this.split = (CombineFileSplit) curSplit;
  this.context = curContext;

  if (null == rr) {
    createChildReader();
  }

  FileSplit fileSplit = new FileSplit(this.split.getPath(index),
      this.split.getOffset(index), this.split.getLength(index),
      this.split.getLocations());
  this.rr.initialize(fileSplit, this.context);
}
 
Example #24
Source File: PhoenixInputFormat.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {  
    final Configuration configuration = context.getConfiguration();
    final QueryPlan queryPlan = getQueryPlan(context,configuration);
    final List<KeyRange> allSplits = queryPlan.getSplits();
    final List<InputSplit> splits = generateSplits(queryPlan,allSplits);
    return splits;
}
 
Example #25
Source File: HadoopSortingTest.java    From ignite with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override public List<InputSplit> getSplits(JobContext ctx) throws IOException, InterruptedException {
    List<InputSplit> res = new ArrayList<>();

    FakeSplit split = new FakeSplit(20);

    for (int i = 0; i < 10; i++)
        res.add(split);

    return res;
}
 
Example #26
Source File: NetezzaExternalTableInputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException,
    InterruptedException {
  int targetNumTasks = ConfigurationHelper.getJobNumMaps(context);
  List<InputSplit> splits = new ArrayList<InputSplit>(targetNumTasks);
  for (int i = 0; i < targetNumTasks; ++i) {
    splits.add(new NetezzaExternalTableInputSplit(i));
  }
  return splits;
}
 
Example #27
Source File: NMapInputFormat.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext context) {
  int count = getNumMapTasks(context.getConfiguration());
  List<InputSplit> splits = new ArrayList<>(count);
  for (int i = 0; i < count; i++) {
    splits.add(new NullInputSplit());
  }
  return splits;
}
 
Example #28
Source File: AvroRecordReader.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
    throws IOException, InterruptedException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration conf = context.getConfiguration();
  SeekableInput in = new FsInput(split.getPath(), conf);
  DatumReader<T> datumReader = new GenericDatumReader<T>();
  this.reader = DataFileReader.openReader(in, datumReader);
  reader.sync(split.getStart());                    // sync to start
  this.start = reader.tell();
  this.end = split.getStart() + split.getLength();
}
 
Example #29
Source File: MainframeDatasetInputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = new ArrayList<InputSplit>();
  Configuration conf = job.getConfiguration();
  String dsName
      = conf.get(MainframeConfiguration.MAINFRAME_INPUT_DATASET_NAME);
  LOG.info("Datasets to transfer from: " + dsName);
  List<String> datasets = retrieveDatasets(dsName, conf);
  if (datasets.isEmpty()) {
    throw new IOException ("No sequential datasets retrieved from " + dsName);
  } else {
    int count = datasets.size();
    int chunks = Math.min(count, ConfigurationHelper.getJobNumMaps(job));
    for (int i = 0; i < chunks; i++) {
      splits.add(new MainframeDatasetInputSplit());
    }

    int j = 0;
    while(j < count) {
      for (InputSplit sp : splits) {
        if (j == count) {
          break;
        }
        ((MainframeDatasetInputSplit)sp).addDataset(datasets.get(j));
        j++;
      }
    }
  }
  return splits;
}
 
Example #30
Source File: MapContextImpl.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public MapContextImpl(Configuration conf, TaskAttemptID taskid,
                      RecordReader<KEYIN,VALUEIN> reader,
                      RecordWriter<KEYOUT,VALUEOUT> writer,
                      OutputCommitter committer,
                      TezTaskContext context,
                      InputSplit split, Reporter reporter) {
  super(conf, taskid, writer, committer, context, reporter);
  this.reader = reader;
  this.split = split;
}