Java Code Examples for org.apache.hadoop.mapred.FileInputFormat#getSplits()

The following examples show how to use org.apache.hadoop.mapred.FileInputFormat#getSplits() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: CartesianCommentComparison.java From hadoop-map-reduce-patterns with Apache License 2.0

5 votes

private InputSplit[] getInputSplits(JobConf conf, String inputFormatClass,
		String inputPath, int numSplits) throws ClassNotFoundException, IOException {
	// Create a new instance of the input format
	FileInputFormat inputFormat = (FileInputFormat) ReflectionUtils.newInstance(
			Class.forName(inputFormatClass), conf);
	// Set the input path for the left data set
	inputFormat.setInputPaths(conf, inputPath);
	// Get the left input splits
	return inputFormat.getSplits(conf, numSplits);
}

Example 2

Source File: OldApiHadoopFileInputSource.java From incubator-gobblin with Apache License 2.0

4 votes

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
  JobConf jobConf = new JobConf(new Configuration());
  for (String key : state.getPropertyNames()) {
    jobConf.set(key, state.getProp(key));
  }

  if (state.contains(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
    for (String inputPath : state.getPropAsList(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
      FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    }
  }

  try {
    FileInputFormat<K, V> fileInputFormat = getFileInputFormat(state, jobConf);
    InputSplit[] fileSplits = fileInputFormat.getSplits(jobConf, state.getPropAsInt(
        HadoopFileInputSource.FILE_SPLITS_DESIRED_KEY, HadoopFileInputSource.DEFAULT_FILE_SPLITS_DESIRED));
    if (fileSplits == null || fileSplits.length == 0) {
      return ImmutableList.of();
    }

    Extract.TableType tableType = state.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY) ?
        Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null;
    String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY);
    String tableName = state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY);

    List<WorkUnit> workUnits = Lists.newArrayListWithCapacity(fileSplits.length);
    for (InputSplit inputSplit : fileSplits) {
      // Create one WorkUnit per InputSplit
      FileSplit fileSplit = (FileSplit) inputSplit;
      Extract extract = createExtract(tableType, tableNamespace, tableName);
      WorkUnit workUnit = WorkUnit.create(extract);
      workUnit.setProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY, HadoopUtils.serializeToString(fileSplit));
      workUnit.setProp(HadoopFileInputSource.FILE_SPLIT_PATH_KEY, fileSplit.getPath().toString());
      workUnits.add(workUnit);
    }

    return workUnits;
  } catch (IOException ioe) {
    throw new RuntimeException("Failed to get workunits", ioe);
  }
}