org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PerformanceEvaluation.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Run a mapreduce job.  Run as many maps as asked-for clients.
 * Before we start up the job, write out an input file with instruction
 * per client regards which row they are to start on.
 * @param cmd Command to run.
 */
private void doMapReduce(final Class<? extends Test> cmd)
    throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}
 
Example #2
Source File: WATServerType.java    From cc-warc-examples with MIT License 5 votes vote down vote up
/**
 * Builds and runs the Hadoop job.
 * @return	0 if the Hadoop job completes successfully and 1 otherwise.
 */
@Override
public int run(String[] arg0) throws Exception {
	Configuration conf = getConf();
	//
	Job job = new Job(conf);
	job.setJarByClass(WATServerType.class);
	job.setNumReduceTasks(1);
	
	String inputPath = "data/*.warc.wat.gz";
	//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
	//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
	LOG.info("Input path: " + inputPath);
	FileInputFormat.addInputPath(job, new Path(inputPath));
	
	String outputPath = "/tmp/cc/";
	FileSystem fs = FileSystem.newInstance(conf);
	if (fs.exists(new Path(outputPath))) {
		fs.delete(new Path(outputPath), true);
	}
	FileOutputFormat.setOutputPath(job, new Path(outputPath));
	
	job.setInputFormatClass(WARCFileInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    
    job.setMapperClass(ServerTypeMap.ServerMapper.class);
    job.setReducerClass(LongSumReducer.class);
	
    if (job.waitForCompletion(true)) {
    	return 0;
    } else {
    	return 1;
    }
}
 
Example #3
Source File: WETWordCount.java    From cc-warc-examples with MIT License 5 votes vote down vote up
/**
 * Builds and runs the Hadoop job.
 * @return	0 if the Hadoop job completes successfully and 1 otherwise.
 */
@Override
public int run(String[] arg0) throws Exception {
	Configuration conf = getConf();
	//
	Job job = new Job(conf);
	job.setJarByClass(WETWordCount.class);
	job.setNumReduceTasks(1);
	
	String inputPath = "data/*.warc.wet.gz";
	//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
	//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
	LOG.info("Input path: " + inputPath);
	FileInputFormat.addInputPath(job, new Path(inputPath));
	
	String outputPath = "/tmp/cc/";
	FileSystem fs = FileSystem.newInstance(conf);
	if (fs.exists(new Path(outputPath))) {
		fs.delete(new Path(outputPath), true);
	}
	FileOutputFormat.setOutputPath(job, new Path(outputPath));
	
	job.setInputFormatClass(WARCFileInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    
    job.setMapperClass(WordCounterMap.WordCountMapper.class);
    // The reducer is quite useful in the word frequency task 
    job.setReducerClass(LongSumReducer.class);
	
    if (job.waitForCompletion(true)) {
    	return 0;
    } else {
    	return 1;
    }
}
 
Example #4
Source File: WARCTagCounter.java    From cc-warc-examples with MIT License 5 votes vote down vote up
/**
 * Builds and runs the Hadoop job.
 * @return	0 if the Hadoop job completes successfully and 1 otherwise.
 */
@Override
public int run(String[] arg0) throws Exception {
	Configuration conf = getConf();
	//
	Job job = new Job(conf);
	job.setJarByClass(WARCTagCounter.class);
	job.setNumReduceTasks(1);
	
	String inputPath = "data/*.warc.gz";
	//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
	//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
	LOG.info("Input path: " + inputPath);
	FileInputFormat.addInputPath(job, new Path(inputPath));
	
	String outputPath = "/tmp/cc/";
	FileSystem fs = FileSystem.newInstance(conf);
	if (fs.exists(new Path(outputPath))) {
		fs.delete(new Path(outputPath), true);
	}
	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	job.setInputFormatClass(WARCFileInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    
    job.setMapperClass(TagCounterMap.TagCounterMapper.class);
    job.setReducerClass(LongSumReducer.class);

    return job.waitForCompletion(true) ? 0 : -1;
}
 
Example #5
Source File: Wordcount.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args)
            .getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 2;
    }

    conf.set("nl.basjes.parse.apachehttpdlogline.format", logFormat);

    // A ',' separated list of fields
    conf.set("nl.basjes.parse.apachehttpdlogline.fields",
            "STRING:request.status.last");

    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(Wordcount.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

    job.setInputFormatClass(ApacheHttpdLogfileInputFormat.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    // configuration should contain reference to your namenode
    FileSystem fs = FileSystem.get(conf);
    // true stands for recursively deleting the folder you gave
    Path outputPath = new Path(otherArgs[1]);
    fs.delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    if (job.waitForCompletion(true)) {
        return 0;
    }
    return 1;
}
 
Example #6
Source File: DBCountPageView.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
//Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {
  
  String driverClassName = DRIVER_CLASS;
  String url = DB_URL;
  
  if(args.length > 1) {
    driverClassName = args[0];
    url = args[1];
  }
  
  initialize(driverClassName, url);
  Configuration conf = getConf();

  DBConfiguration.configureDB(conf, driverClassName, url);

  Job job = new Job(conf);
      
  job.setJobName("Count Pageviews of URLs");
  job.setJarByClass(DBCountPageView.class);
  job.setMapperClass(PageviewMapper.class);
  job.setCombinerClass(LongSumReducer.class);
  job.setReducerClass(PageviewReducer.class);

  DBInputFormat.setInput(job, AccessRecord.class, "Access"
      , null, "url", AccessFieldNames);

  DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);
  
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(LongWritable.class);

  job.setOutputKeyClass(PageviewRecord.class);
  job.setOutputValueClass(NullWritable.class);
  int ret;
  try {
    ret = job.waitForCompletion(true) ? 0 : 1;
    boolean correct = verify();
    if(!correct) {
      throw new RuntimeException("Evaluation was not correct!");
    }
  } finally {
    shutdown();    
  }
  return ret;
}
 
Example #7
Source File: Grep.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);
  
  try {
    
    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
 
Example #8
Source File: DBCountPageView.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
//Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {
  
  String driverClassName = DRIVER_CLASS;
  String url = DB_URL;
  
  if(args.length > 1) {
    driverClassName = args[0];
    url = args[1];
  }
  
  initialize(driverClassName, url);
  Configuration conf = getConf();

  DBConfiguration.configureDB(conf, driverClassName, url);

  Job job = new Job(conf);
      
  job.setJobName("Count Pageviews of URLs");
  job.setJarByClass(DBCountPageView.class);
  job.setMapperClass(PageviewMapper.class);
  job.setCombinerClass(LongSumReducer.class);
  job.setReducerClass(PageviewReducer.class);

  DBInputFormat.setInput(job, AccessRecord.class, "Access"
      , null, "url", AccessFieldNames);

  DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);
  
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(LongWritable.class);

  job.setOutputKeyClass(PageviewRecord.class);
  job.setOutputValueClass(NullWritable.class);
  int ret;
  try {
    ret = job.waitForCompletion(true) ? 0 : 1;
    boolean correct = verify();
    if(!correct) {
      throw new RuntimeException("Evaluation was not correct!");
    }
  } finally {
    shutdown();    
  }
  return ret;
}
 
Example #9
Source File: Grep.java    From big-c with Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);
  
  try {
    
    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
 
Example #10
Source File: BasicJobChaining.java    From hadoop-map-reduce-patterns with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

	if (otherArgs.length != 3) {
		System.err.println("Usage: JobChainingDriver <posts> <users> <out>");
		System.exit(2);
	}

	Path postInput = new Path(otherArgs[0]);
	Path userInput = new Path(otherArgs[1]);
	Path outputDirIntermediate = new Path(otherArgs[2] + "_int");
	Path outputDir = new Path(otherArgs[2]);

	// Setup first job to counter user posts
	Job countingJob = new Job(conf, "JobChaining-Counting");
	countingJob.setJarByClass(BasicJobChaining.class);

	// Set our mapper and reducer, we can use the API's long sum reducer for
	// a combiner!
	countingJob.setMapperClass(UserIdCountMapper.class);
	countingJob.setCombinerClass(LongSumReducer.class);
	countingJob.setReducerClass(UserIdSumReducer.class);

	countingJob.setOutputKeyClass(Text.class);
	countingJob.setOutputValueClass(LongWritable.class);

	countingJob.setInputFormatClass(TextInputFormat.class);

	TextInputFormat.addInputPath(countingJob, postInput);

	countingJob.setOutputFormatClass(TextOutputFormat.class);
	TextOutputFormat.setOutputPath(countingJob, outputDirIntermediate);

	// Execute job and grab exit code
	int code = countingJob.waitForCompletion(true) ? 0 : 1;

	if (code == 0) {
		// Calculate the average posts per user by getting counter values
		double numRecords = (double) countingJob.getCounters()
				.findCounter(AVERAGE_CALC_GROUP, UserIdCountMapper.RECORDS_COUNTER_NAME)
				.getValue();
		double numUsers = (double) countingJob.getCounters()
				.findCounter(AVERAGE_CALC_GROUP, UserIdSumReducer.USERS_COUNTER_NAME)
				.getValue();

		double averagePostsPerUser = numRecords / numUsers;

		// Setup binning job
		Job binningJob = new Job(new Configuration(), "JobChaining-Binning");
		binningJob.setJarByClass(BasicJobChaining.class);

		// Set mapper and the average posts per user
		binningJob.setMapperClass(UserIdBinningMapper.class);
		UserIdBinningMapper.setAveragePostsPerUser(binningJob, averagePostsPerUser);

		binningJob.setNumReduceTasks(0);

		binningJob.setInputFormatClass(TextInputFormat.class);
		TextInputFormat.addInputPath(binningJob, outputDirIntermediate);

		// Add two named outputs for below/above average
		MultipleOutputs.addNamedOutput(binningJob, MULTIPLE_OUTPUTS_BELOW_NAME,
				TextOutputFormat.class, Text.class, Text.class);

		MultipleOutputs.addNamedOutput(binningJob, MULTIPLE_OUTPUTS_ABOVE_NAME,
				TextOutputFormat.class, Text.class, Text.class);
		MultipleOutputs.setCountersEnabled(binningJob, true);

		TextOutputFormat.setOutputPath(binningJob, outputDir);

		// Add the user files to the DistributedCache
		FileStatus[] userFiles = FileSystem.get(conf).listStatus(userInput);
		for (FileStatus status : userFiles) {
			DistributedCache.addCacheFile(status.getPath().toUri(),
					binningJob.getConfiguration());
		}

		// Execute job and grab exit code
		code = binningJob.waitForCompletion(true) ? 0 : 1;
	}

	// Clean up the intermediate output
	FileSystem.get(conf).delete(outputDirIntermediate, true);

	System.exit(code);
}