Java Code Examples for org.apache.hadoop.mapreduce.lib.input.FileInputFormat#setMinInputSplitSize()

The following examples show how to use org.apache.hadoop.mapreduce.lib.input.FileInputFormat#setMinInputSplitSize() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   File: TeraValidate.java    License: Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 1;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 2
Source Project: big-c   File: TeraValidate.java    License: Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 1;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 3
Source Project: incubator-tez   File: TeraValidate.java    License: Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 1;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 4
Source Project: ignite   File: HadoopPopularWords.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Configures the Hadoop MapReduce job.
 *
 * @return Instance of the Hadoop MapRed job.
 * @throws IOException If failed.
 */
@SuppressWarnings("deprecation")
private Job createConfigBasedHadoopJob() throws IOException {
    Job jobCfg = new Job();

    Configuration cfg = jobCfg.getConfiguration();

    // Use explicit configuration of distributed file system, if provided.
    cfg.addResource(U.resolveIgniteUrl(DFS_CFG));

    jobCfg.setJobName("HadoopPopularWordExample");
    jobCfg.setJarByClass(HadoopPopularWords.class);
    jobCfg.setInputFormatClass(TextInputFormat.class);
    jobCfg.setOutputKeyClass(Text.class);
    jobCfg.setOutputValueClass(IntWritable.class);
    jobCfg.setMapperClass(TokenizingMapper.class);
    jobCfg.setReducerClass(TopNWordsReducer.class);

    FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR);
    FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR);

    // Local job tracker allows the only task per wave, but text input format
    // replaces it with the calculated value based on input split size option.
    if ("local".equals(cfg.get("mapred.job.tracker", "local"))) {
        // Split job into tasks using 32MB split size.
        FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024);
        FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE);
    }

    return jobCfg;
}
 
Example 5
protected List<InputSplit> computeSplits(long desiredBundleSizeBytes) throws IOException, IllegalAccessException,
        InstantiationException {
    Job job = jobInstance();
    FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes);
    FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes);
    return createFormat(job).getSplits(job);
}