Java Code Examples for org.apache.hadoop.mapreduce.lib.output.FileOutputFormat

The following are top voted examples for showing how to use org.apache.hadoop.mapreduce.lib.output.FileOutputFormat. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: mumu-parquet   File: MapReduceParquetMapReducer.java   View source code 8 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Job job = new Job(getConf(), "Text to Parquet");
    job.setJarByClass(getClass());
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(TextToParquetMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(AvroParquetOutputFormat.class);
    AvroParquetOutputFormat.setSchema(job, SCHEMA);
    job.setOutputKeyClass(Void.class);
    job.setOutputValueClass(Group.class);
    return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 2
Project: LDA   File: CalParamDriver.java   View source code 7 votes vote down vote up
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = "calculating parameter";
    conf.set("params",String.valueOf(params));

    Job job = new Job(conf, jobName);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(indexToCountWritable.class);
    job.setOutputKeyClass(twoDimensionIndexWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(CalParamsMapper.class);
    job.setReducerClass(CalParamsReducer.class);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job,output);

    job.setJarByClass(LDADriver.class);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("calculating parameter failed");
    }
}
 
Example 3
Project: Hadoop-Codes   File: MaxTempDriver.java   View source code 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "maxtemp");
	
	job.setMapperClass(MaxTempMapper.class);
	job.setReducerClass(MaxTempReducer.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(FloatWritable.class);

	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
Example 4
Project: ditb   File: IntegrationTestBigLinkedList.java   View source code 7 votes vote down vote up
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMuplitplier) throws Exception {
  LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
      + ", numNodes=" + numNodes);
  Job job = Job.getInstance(getConf());

  job.setJobName("Random Input Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  job.setInputFormatClass(GeneratorInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);

  job.setMapperClass(Mapper.class); //identity mapper

  FileOutputFormat.setOutputPath(job, tmpOutput);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}
 
Example 5
Project: ditb   File: IntegrationTestLoadAndVerify.java   View source code 7 votes vote down vote up
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "verify-output");
  LOG.info("Verify output dir: " + outputDir);

  Job job = Job.getInstance(conf);
  job.setJarByClass(this.getClass());
  job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
  setJobScannerConf(job);

  Scan scan = new Scan();

  TableMapReduceUtil.initTableMapperJob(
      htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
  int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
  TableMapReduceUtil.setScannerCaching(job, scannerCaching);

  job.setReducerClass(VerifyReducer.class);
  job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
  FileOutputFormat.setOutputPath(job, outputDir);
  assertTrue(job.waitForCompletion(true));

  long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
  assertEquals(0, numOutputRecords);
}
 
Example 6
Project: big-data-benchmark   File: HadoopWordCount.java   View source code 7 votes vote down vote up
public static void main(String[] args) throws Exception {
    BasicConfigurator.configure();
    Configuration conf = new Configuration();
    conf.setQuietMode(true);

    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(HadoopWordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis()));

    long t = System.currentTimeMillis();
    job.waitForCompletion(true);

    System.out.println("TotalTime=" + (System.currentTimeMillis() - t));
}
 
Example 7
Project: learn-to-hadoop   File: MaxTemperatureWithCombiner.java   View source code 7 votes vote down vote up
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperatureWithCombiner.class);
    job.setJobName("Max Temperature With Combiner");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 8
Project: hadoop   File: MapReduceTestUtil.java   View source code 7 votes vote down vote up
/**
 * Creates a simple copy job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  conf.setInt(MRJobConfig.NUM_MAPS, 3);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(DataCopyMapper.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopyReducer.class);
  theJob.setNumReduceTasks(1);
  return theJob;
}
 
Example 9
Project: hadoop   File: MapReduceTestUtil.java   View source code 7 votes vote down vote up
/**
 * Creates a simple fail job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple fail job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createFailJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  FileSystem fs = outdir.getFileSystem(conf);
  if (fs.exists(outdir)) {
    fs.delete(outdir, true);
  }
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("Fail-Job");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(FailMapper.class);
  theJob.setReducerClass(Reducer.class);
  theJob.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  return theJob;
}
 
Example 10
Project: ditb   File: IntegrationTestLoadAndVerify.java   View source code 7 votes vote down vote up
protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "load-output");
  LOG.info("Load output dir: " + outputDir);

  NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
  conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString());

  Job job = Job.getInstance(conf);
  job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
  job.setJarByClass(this.getClass());
  setMapperClass(job);
  job.setInputFormatClass(NMapInputFormat.class);
  job.setNumReduceTasks(0);
  setJobScannerConf(job);
  FileOutputFormat.setOutputPath(job, outputDir);

  TableMapReduceUtil.addDependencyJars(job);

  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);
  assertTrue(job.waitForCompletion(true));
  return job;
}
 
Example 11
Project: hadoop   File: MapReduceTestUtil.java   View source code 7 votes vote down vote up
public static Job createJob(Configuration conf, Path inDir, Path outDir, 
    int numInputFiles, int numReds, String input) throws IOException {
  Job job = Job.getInstance(conf);
  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (fs.exists(inDir)) {
    fs.delete(inDir, true);
  }
  fs.mkdirs(inDir);
  for (int i = 0; i < numInputFiles; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }    

  FileInputFormat.setInputPaths(job, inDir);
  FileOutputFormat.setOutputPath(job, outDir);
  job.setNumReduceTasks(numReds);
  return job;
}
 
Example 12
Project: mapreduce-samples   File: UnitSum.java   View source code 7 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);

        ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
        ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);

        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
 
Example 13
Project: ditb   File: IntegrationTestWithCellVisibilityLoadAndVerify.java   View source code 7 votes vote down vote up
private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths)
    throws IOException, InterruptedException, ClassNotFoundException {
  Path outputDir = getTestDir(TEST_NAME, "verify-output");
  Job job = new Job(conf);
  job.setJarByClass(this.getClass());
  job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
  setJobScannerConf(job);
  Scan scan = new Scan();
  scan.setAuthorizations(new Authorizations(auths));
  TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan,
      VerifyMapper.class, NullWritable.class, NullWritable.class, job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
  int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
  TableMapReduceUtil.setScannerCaching(job, scannerCaching);
  job.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(job, outputDir);
  assertTrue(job.waitForCompletion(true));
  return job;
}
 
Example 14
Project: hadoop   File: TestJoinDatamerge.java   View source code 7 votes vote down vote up
public void testEmptyJoin() throws Exception {
  Configuration conf = new Configuration();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("outer",
      MapReduceTestUtil.Fake_IF.class, src));
  MapReduceTestUtil.Fake_IF.setKeyClass(conf, 
    MapReduceTestUtil.IncomparableKey.class);
  Job job = Job.getInstance(conf);
  job.setInputFormatClass(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(Mapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(MapReduceTestUtil.IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());
  base.getFileSystem(conf).delete(base, true);
}
 
Example 15
Project: hadoop   File: TestLocalRunner.java   View source code 7 votes vote down vote up
/**
 * Run a test with a misconfigured number of mappers.
 * Expect failure.
 */
@Test
public void testInvalidMultiMapParallelism() throws Exception {
  Job job = Job.getInstance();

  Path inputPath = createMultiMapsInput();
  Path outputPath = getOutputPath();

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);

  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }

  job.setMapperClass(StressMapper.class);
  job.setReducerClass(CountingReducer.class);
  job.setNumReduceTasks(1);
  LocalJobRunner.setLocalMaxRunningMaps(job, -6);
  FileInputFormat.addInputPath(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  boolean success = job.waitForCompletion(true);
  assertFalse("Job succeeded somehow", success);
}
 
Example 16
Project: hadoop   File: TestLocalRunner.java   View source code 7 votes vote down vote up
/** Test case for zero mappers */
@Test
public void testEmptyMaps() throws Exception {
  Job job = Job.getInstance();
  Path outputPath = getOutputPath();

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);

  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }

  job.setInputFormatClass(EmptyInputFormat.class);
  job.setNumReduceTasks(1);
  FileOutputFormat.setOutputPath(job, outputPath);

  boolean success = job.waitForCompletion(true);
  assertTrue("Empty job should work", success);
}
 
Example 17
Project: DocIT   File: Cut.java   View source code 6 votes vote down vote up
public static void cut(String name, String in) throws Exception {
	Job job = createJob(name, in);
	job.setNumReduceTasks(0);
	boolean success = job.waitForCompletion(true);

	if (success) {
		// MapReduce reads an input and writes the result to an new location.
		// Hence it can not modify data in place, and we have to replace the input
		// with the output
		Path output = FileOutputFormat.getOutputPath(job);
		FileSystem fs = output.getFileSystem(job.getConfiguration());
		Path[] inputs = FileInputFormat.getInputPaths(job);
		for (int i = 0; i < inputs.length; i++) {
			fs.delete(inputs[i], true);
		}
		fs.rename(output, inputs[0]);
	}
}
 
Example 18
Project: Wikipedia-Index   File: TF.java   View source code 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	conf.set("xmlinput.start", "<page>");
	conf.set("xmlinput.end", "</page>");
	
	Job job =Job.getInstance(conf);
	job.setJobName("TermFrequencyCount");
	job.setJarByClass(TF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(IntArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TFMap.class);
	job.setReducerClass(TFReduce.class);
	
	job.setInputFormatClass(XmlInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
Example 19
Project: ditb   File: CellCounter.java   View source code 6 votes vote down vote up
/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
    throws IOException {
  String tableName = args[0];
  Path outputDir = new Path(args[1]);
  String reportSeparatorString = (args.length > 2) ? args[2]: ":";
  conf.set("ReportSeparator", reportSeparatorString);
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJarByClass(CellCounter.class);
  Scan scan = getConfiguredScanForJob(conf, args);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
      CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(1);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setReducerClass(IntSumReducer.class);
  return job;
}
 
Example 20
Project: aliyun-maxcompute-data-collectors   File: HBaseBulkImportJob.java   View source code 6 votes vote down vote up
@Override
protected void jobSetup(Job job) throws IOException, ImportException {
  super.jobSetup(job);

  // we shouldn't have gotten here if bulk load dir is not set
  // so let's throw a ImportException
  if(getContext().getDestination() == null){
    throw new ImportException("Can't run HBaseBulkImportJob without a " +
        "valid destination directory.");
  }

  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Preconditions.class);
  FileOutputFormat.setOutputPath(job, getContext().getDestination());
  HTable hTable = new HTable(job.getConfiguration(), options.getHBaseTable());
  HFileOutputFormat.configureIncrementalLoad(job, hTable);
}
 
Example 21
Project: Deep_learning_using_Java   File: Recommendation_program.java   View source code 6 votes vote down vote up
private Job jobListFriends(String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException{      
    Job job = new Job();
    job.setJarByClass(WordCount.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(KeyValueTextInputFormat.class);   // Need to change the import
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.waitForCompletion(true);

    return job;
}
 
Example 22
Project: hadoop   File: RandomTextWriterJob.java   View source code 6 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  Job job = createJob(getConf());
  FileOutputFormat.setOutputPath(job, new Path(args[0]));
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}
 
Example 23
Project: mapreduce-samples   File: UnitMultiplication.java   View source code 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
        ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
 
Example 24
Project: hadoop   File: WordStandardDeviation.java   View source code 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: wordstddev <in> <out>");
    return 0;
  }

  Configuration conf = getConf();

  Job job = Job.getInstance(conf, "word stddev");
  job.setJarByClass(WordStandardDeviation.class);
  job.setMapperClass(WordStandardDeviationMapper.class);
  job.setCombinerClass(WordStandardDeviationReducer.class);
  job.setReducerClass(WordStandardDeviationReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  FileInputFormat.addInputPath(job, new Path(args[0]));
  Path outputpath = new Path(args[1]);
  FileOutputFormat.setOutputPath(job, outputpath);
  boolean result = job.waitForCompletion(true);

  // read output and calculate standard deviation
  stddev = readAndCalcStdDev(outputpath, conf);

  return (result ? 0 : 1);
}
 
Example 25
Project: hadoop   File: WordCount.java   View source code 6 votes vote down vote up
public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: wordcount <in> [<in>...] <out>");
    System.exit(2);
  }
  Job job = Job.getInstance(conf, "word count");
  job.setJarByClass(WordCount.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }
  FileOutputFormat.setOutputPath(job,
    new Path(otherArgs[otherArgs.length - 1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 26
Project: hadoop   File: WordMean.java   View source code 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: wordmean <in> <out>");
    return 0;
  }

  Configuration conf = getConf();

  Job job = Job.getInstance(conf, "word mean");
  job.setJarByClass(WordMean.class);
  job.setMapperClass(WordMeanMapper.class);
  job.setCombinerClass(WordMeanReducer.class);
  job.setReducerClass(WordMeanReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  FileInputFormat.addInputPath(job, new Path(args[0]));
  Path outputpath = new Path(args[1]);
  FileOutputFormat.setOutputPath(job, outputpath);
  boolean result = job.waitForCompletion(true);
  mean = readAndCalcMean(outputpath, conf);

  return (result ? 0 : 1);
}
 
Example 27
Project: mapreduce-samples   File: UnitMultiplication.java   View source code 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
        ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
 
Example 28
Project: hadoop   File: TeraChecksum.java   View source code 6 votes vote down vote up
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 2;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSum");
  job.setJarByClass(TeraChecksum.class);
  job.setMapperClass(ChecksumMapper.class);
  job.setReducerClass(ChecksumReducer.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Unsigned16.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 29
Project: hadoop   File: TeraValidate.java   View source code 6 votes vote down vote up
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 1;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 30
Project: hadoop   File: CompressionEmulationUtil.java   View source code 6 votes vote down vote up
/**
 * Configure the {@link Job} for enabling compression emulation.
 */
static void configure(final Job job) throws IOException, InterruptedException,
                                            ClassNotFoundException {
  // set the random text mapper
  job.setMapperClass(RandomTextDataMapper.class);
  job.setNumReduceTasks(0);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setInputFormatClass(GenDataFormat.class);
  job.setJarByClass(GenerateData.class);

  // set the output compression true
  FileOutputFormat.setCompressOutput(job, true);
  try {
    FileInputFormat.addInputPath(job, new Path("ignored"));
  } catch (IOException e) {
    LOG.error("Error while adding input path ", e);
  }
}
 
Example 31
Project: hadoop   File: LoadJob.java   View source code 6 votes vote down vote up
public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  ugi.doAs(
    new PrivilegedExceptionAction<Job>() {
      public Job run() throws IOException, ClassNotFoundException,
                              InterruptedException {
        job.setMapperClass(LoadMapper.class);
        job.setReducerClass(LoadReducer.class);
        job.setNumReduceTasks(jobdesc.getNumberReduces());
        job.setMapOutputKeyClass(GridmixKey.class);
        job.setMapOutputValueClass(GridmixRecord.class);
        job.setSortComparatorClass(LoadSortComparator.class);
        job.setGroupingComparatorClass(SpecGroupingComparator.class);
        job.setInputFormatClass(LoadInputFormat.class);
        job.setOutputFormatClass(RawBytesOutputFormat.class);
        job.setPartitionerClass(DraftPartitioner.class);
        job.setJarByClass(LoadJob.class);
        job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true);
        FileOutputFormat.setOutputPath(job, outdir);
        job.submit();
        return job;
      }
    });

  return job;
}
 
Example 32
Project: hadoop   File: TestCompressionEmulationUtils.java   View source code 6 votes vote down vote up
/**
 * Runs a GridMix data-generation job.
 */
private static void runDataGenJob(Configuration conf, Path tempDir) 
throws IOException, ClassNotFoundException, InterruptedException {
  JobClient client = new JobClient(conf);
  
  // get the local job runner
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  
  Job job = Job.getInstance(conf);
  
  CompressionEmulationUtil.configure(job);
  job.setInputFormatClass(CustomInputFormat.class);
  
  // set the output path
  FileOutputFormat.setOutputPath(job, tempDir);
  
  // submit and wait for completion
  job.submit();
  int ret = job.waitForCompletion(true) ? 0 : 1;

  assertEquals("Job Failed", 0, ret);
}
 
Example 33
Project: Machine-Learning-End-to-Endguide-for-Java-developers   File: AveragePageCount.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration con = new Configuration();
    Job bookJob = Job.getInstance(con, "Average Page Count");
    bookJob.setJarByClass(AveragePageCount.class);
    bookJob.setMapperClass(TextMapper.class);
    bookJob.setReducerClass(AverageReduce.class);
    bookJob.setOutputKeyClass(Text.class);
    bookJob.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
    FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
    if (bookJob.waitForCompletion(true)) {
        System.exit(0);
    }
}
 
Example 34
Project: ditb   File: TestTableInputFormatScanBase.java   View source code 5 votes vote down vote up
/**
 * Tests a MR scan using specific start and stop rows.
 *
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
protected void testScan(String start, String stop, String last)
throws IOException, InterruptedException, ClassNotFoundException {
  String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
    "To" + (stop != null ? stop.toUpperCase() : "Empty");
  LOG.info("Before map/reduce startup - job " + jobName);
  Configuration c = new Configuration(TEST_UTIL.getConfiguration());
  Scan scan = new Scan();
  scan.addFamily(INPUT_FAMILY);
  if (start != null) {
    scan.setStartRow(Bytes.toBytes(start));
  }
  c.set(KEY_STARTROW, start != null ? start : "");
  if (stop != null) {
    scan.setStopRow(Bytes.toBytes(stop));
  }
  c.set(KEY_LASTROW, last != null ? last : "");
  LOG.info("scan before: " + scan);
  Job job = new Job(c, jobName);
  TableMapReduceUtil.initTableMapperJob(
    Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
    ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
  job.setReducerClass(ScanReducer.class);
  job.setNumReduceTasks(1); // one to get final "first" and "last" key
  FileOutputFormat.setOutputPath(job,
      new Path(TEST_UTIL.getDataTestDir(), job.getJobName()));
  LOG.info("Started " + job.getJobName());
  assertTrue(job.waitForCompletion(true));
  LOG.info("After map/reduce completion - job " + jobName);
}
 
Example 35
Project: Java-Data-Science-Made-Easy   File: AveragePageCount.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration con = new Configuration();
    Job bookJob = Job.getInstance(con, "Average Page Count");
    bookJob.setJarByClass(AveragePageCount.class);
    bookJob.setMapperClass(TextMapper.class);
    bookJob.setReducerClass(AverageReduce.class);
    bookJob.setOutputKeyClass(Text.class);
    bookJob.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
    FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
    if (bookJob.waitForCompletion(true)) {
        System.exit(0);
    }
}
 
Example 36
Project: Java-for-Data-Science   File: AveragePageCount.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration con = new Configuration();
    Job bookJob = Job.getInstance(con, "Average Page Count");
    bookJob.setJarByClass(AveragePageCount.class);
    bookJob.setMapperClass(TextMapper.class);
    bookJob.setReducerClass(AverageReduce.class);
    bookJob.setOutputKeyClass(Text.class);
    bookJob.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
    FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
    if (bookJob.waitForCompletion(true)) {
        System.exit(0);
    }
}
 
Example 37
Project: mapreduce-samples   File: UnitSum.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);
        job.setMapperClass(PassMapper.class);
        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.waitForCompletion(true);
    }
 
Example 38
Project: mapreduce-samples   File: UnitSum.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);
        job.setMapperClass(PassMapper.class);
        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.waitForCompletion(true);
    }
 
Example 39
Project: Wikipedia-Index   File: PageWordCount.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	conf.set("xmlinput.start", "<page>");
	conf.set("xmlinput.end", "</page>");
			
	Job job =Job.getInstance(conf);
	job.setJobName("PageWordCount");
	job.setJarByClass(PageWordCount.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	
	job.setMapperClass(PageWordCountMap.class);
	job.setCombinerClass(PageWordCountReduce.class);
	job.setReducerClass(PageWordCountReduce.class);
	
	job.setInputFormatClass(XmlInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	job.waitForCompletion(true);
}
 
Example 40
Project: ditb   File: HashTable.java   View source code 5 votes vote down vote up
public Job createSubmittableJob(String[] args) throws IOException {
  Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
  generatePartitions(partitionsPath);
  
  Job job = Job.getInstance(getConf(),
        getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
  Configuration jobConf = job.getConfiguration();
  jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
  job.setJarByClass(HashTable.class);

  TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
      HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
  
  // use a TotalOrderPartitioner and reducers to group region output into hash files
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
  job.setReducerClass(Reducer.class);  // identity reducer
  job.setNumReduceTasks(tableHash.numHashFiles);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(ImmutableBytesWritable.class);
  job.setOutputFormatClass(MapFileOutputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
  
  return job;
}