Java Code Examples for org.apache.hadoop.mapred.FileInputFormat#getSplits()

The following examples show how to use org.apache.hadoop.mapred.FileInputFormat#getSplits() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CartesianCommentComparison.java    From hadoop-map-reduce-patterns with Apache License 2.0 5 votes vote down vote up
private InputSplit[] getInputSplits(JobConf conf, String inputFormatClass,
		String inputPath, int numSplits) throws ClassNotFoundException, IOException {
	// Create a new instance of the input format
	FileInputFormat inputFormat = (FileInputFormat) ReflectionUtils.newInstance(
			Class.forName(inputFormatClass), conf);
	// Set the input path for the left data set
	inputFormat.setInputPaths(conf, inputPath);
	// Get the left input splits
	return inputFormat.getSplits(conf, numSplits);
}
 
Example 2
Source File: OldApiHadoopFileInputSource.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Override
public List<WorkUnit> getWorkunits(SourceState state) {
  JobConf jobConf = new JobConf(new Configuration());
  for (String key : state.getPropertyNames()) {
    jobConf.set(key, state.getProp(key));
  }

  if (state.contains(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
    for (String inputPath : state.getPropAsList(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
      FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    }
  }

  try {
    FileInputFormat<K, V> fileInputFormat = getFileInputFormat(state, jobConf);
    InputSplit[] fileSplits = fileInputFormat.getSplits(jobConf, state.getPropAsInt(
        HadoopFileInputSource.FILE_SPLITS_DESIRED_KEY, HadoopFileInputSource.DEFAULT_FILE_SPLITS_DESIRED));
    if (fileSplits == null || fileSplits.length == 0) {
      return ImmutableList.of();
    }

    Extract.TableType tableType = state.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY) ?
        Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null;
    String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY);
    String tableName = state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY);

    List<WorkUnit> workUnits = Lists.newArrayListWithCapacity(fileSplits.length);
    for (InputSplit inputSplit : fileSplits) {
      // Create one WorkUnit per InputSplit
      FileSplit fileSplit = (FileSplit) inputSplit;
      Extract extract = createExtract(tableType, tableNamespace, tableName);
      WorkUnit workUnit = WorkUnit.create(extract);
      workUnit.setProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY, HadoopUtils.serializeToString(fileSplit));
      workUnit.setProp(HadoopFileInputSource.FILE_SPLIT_PATH_KEY, fileSplit.getPath().toString());
      workUnits.add(workUnit);
    }

    return workUnits;
  } catch (IOException ioe) {
    throw new RuntimeException("Failed to get workunits", ioe);
  }
}