Java Code Examples for org.apache.hadoop.mapreduce.Job

The following are top voted examples for showing how to use org.apache.hadoop.mapreduce.Job. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: learn-to-hadoop   File: MaxTemperatureWithCombiner.java   View source code 7 votes vote down vote up
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperatureWithCombiner.class);
    job.setJobName("Max Temperature With Combiner");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 2
Project: Wikipedia-Index   File: TF_IDF.java   View source code 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("TF-IDFCount");
	job.setJarByClass(TF_IDF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(TextArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TF_IDFMap.class);
	job.setReducerClass(TF_IDFReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileInputFormat.addInputPath(job, new Path(args[1]));
	FileOutputFormat.setOutputPath(job, new Path(args[2]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
Example 3
Project: hadoop   File: MapReduceTestUtil.java   View source code 7 votes vote down vote up
/**
 * Creates a simple copy job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  conf.setInt(MRJobConfig.NUM_MAPS, 3);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(DataCopyMapper.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopyReducer.class);
  theJob.setNumReduceTasks(1);
  return theJob;
}
 
Example 4
Project: Hadoop-Codes   File: MaxTempDriver.java   View source code 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "maxtemp");
	
	job.setMapperClass(MaxTempMapper.class);
	job.setReducerClass(MaxTempReducer.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(FloatWritable.class);

	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
Example 5
Project: aliyun-maxcompute-data-collectors   File: TestImportJob.java   View source code 7 votes vote down vote up
@Override
public void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

  // Write a line of text into a file so that we can get
  // a record to the map task.
  Path dir = new Path(this.options.getTempDir());
  Path p = new Path(dir, "sqoop-dummy-import-job-file.txt");
  FileSystem fs = FileSystem.getLocal(this.options.getConf());
  if (fs.exists(p)) {
    boolean result = fs.delete(p, false);
    assertTrue("Couldn't delete temp file!", result);
  }

  BufferedWriter w = new BufferedWriter(
      new OutputStreamWriter(fs.create(p)));
  w.append("This is a line!");
  w.close();

  FileInputFormat.addInputPath(job, p);

  // And set the InputFormat itself.
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}
 
Example 6
Project: hadoop   File: LoadJob.java   View source code 6 votes vote down vote up
public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  ugi.doAs(
    new PrivilegedExceptionAction<Job>() {
      public Job run() throws IOException, ClassNotFoundException,
                              InterruptedException {
        job.setMapperClass(LoadMapper.class);
        job.setReducerClass(LoadReducer.class);
        job.setNumReduceTasks(jobdesc.getNumberReduces());
        job.setMapOutputKeyClass(GridmixKey.class);
        job.setMapOutputValueClass(GridmixRecord.class);
        job.setSortComparatorClass(LoadSortComparator.class);
        job.setGroupingComparatorClass(SpecGroupingComparator.class);
        job.setInputFormatClass(LoadInputFormat.class);
        job.setOutputFormatClass(RawBytesOutputFormat.class);
        job.setPartitionerClass(DraftPartitioner.class);
        job.setJarByClass(LoadJob.class);
        job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true);
        FileOutputFormat.setOutputPath(job, outdir);
        job.submit();
        return job;
      }
    });

  return job;
}
 
Example 7
Project: hadoop   File: TestChainErrors.java   View source code 6 votes vote down vote up
/**
 * Tests one of the maps consuming output.
 * 
 * @throws Exception
 */
public void testChainMapNoOuptut() throws Exception {
  Configuration conf = createJobConf();
  String expectedOutput = "";

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input);
  job.setJobName("chain");

  ChainMapper.addMapper(job, ConsumeMap.class, IntWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  job.waitForCompletion(true);
  assertTrue("Job failed", job.isSuccessful());
  assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil
      .readOutput(outDir, conf));
}
 
Example 8
Project: ditb   File: IntegrationTestTableMapReduceUtil.java   View source code 6 votes vote down vote up
/**
 * Look for jars we expect to be on the classpath by name.
 */
@Test
public void testAddDependencyJars() throws Exception {
  Job job = new Job();
  TableMapReduceUtil.addDependencyJars(job);
  String tmpjars = job.getConfiguration().get("tmpjars");

  // verify presence of modules
  assertTrue(tmpjars.contains("hbase-common"));
  assertTrue(tmpjars.contains("hbase-protocol"));
  assertTrue(tmpjars.contains("hbase-client"));
  assertTrue(tmpjars.contains("hbase-hadoop-compat"));
  assertTrue(tmpjars.contains("hbase-server"));

  // verify presence of 3rd party dependencies.
  assertTrue(tmpjars.contains("zookeeper"));
  assertTrue(tmpjars.contains("netty"));
  assertTrue(tmpjars.contains("protobuf"));
  assertTrue(tmpjars.contains("guava"));
  assertTrue(tmpjars.contains("htrace"));
}
 
Example 9
Project: ditb   File: CellCounter.java   View source code 6 votes vote down vote up
/**
 * Main entry point.
 *
 * @param args The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("ERROR: Wrong number of parameters: " + args.length);
    System.err.println("Usage: CellCounter ");
    System.err.println("       <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
      "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
    System.err.println("  Note: -D properties will be applied to the conf used. ");
    System.err.println("  Additionally, the following SCAN properties can be specified");
    System.err.println("  to get fine grained control on what is counted..");
    System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
    System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
        "string : used to separate the rowId/column family name and qualifier name.");
    System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
        "operation to a limited subset of rows from the table based on regex or prefix pattern.");
    System.exit(-1);
  }
  Job job = createSubmittableJob(conf, otherArgs);
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 10
Project: Hadoop-Codes   File: testDriver.java   View source code 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "test");
	
	job.setMapperClass(testMapper.class);
	job.setPartitionerClass(testPartitioner.class);
	job.setReducerClass(testReducer.class);
	job.setNumReduceTasks(10);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	
	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
Example 11
Project: aliyun-maxcompute-data-collectors   File: MergeJob.java   View source code 6 votes vote down vote up
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath)
    throws IOException {
  LOG.info("Trying to merge avro files");
  final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf);
  final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf);
  if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) {
    throw new IOException("Invalid schema for input directories. Schema for old data: ["
        + oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]");
  }
  LOG.debug("Avro Schema:" + oldPathSchema);
  job.setInputFormatClass(AvroInputFormat.class);
  job.setOutputFormatClass(AvroOutputFormat.class);
  job.setMapperClass(MergeAvroMapper.class);
  job.setReducerClass(MergeAvroReducer.class);
  AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema);
}
 
Example 12
Project: ditb   File: SampleUploader.java   View source code 6 votes vote down vote up
/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
  Path inputPath = new Path(args[0]);
  String tableName = args[1];
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJarByClass(Uploader.class);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapperClass(Uploader.class);
  // No reducers.  Just write straight to table.  Call initTableReducerJob
  // because it sets up the TableOutputFormat.
  TableMapReduceUtil.initTableReducerJob(tableName, null, job);
  job.setNumReduceTasks(0);
  return job;
}
 
Example 13
Project: ditb   File: TestTableInputFormat.java   View source code 6 votes vote down vote up
void testInputFormat(Class<? extends InputFormat> clazz)
    throws IOException, InterruptedException, ClassNotFoundException {
  final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
  job.setInputFormatClass(clazz);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);

  LOG.debug("submitting job.");
  assertTrue("job failed!", job.waitForCompletion(true));
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
  assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
  assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
  assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
}
 
Example 14
Project: ditb   File: TestHFileOutputFormat2.java   View source code 6 votes vote down vote up
private void runIncrementalPELoad(Configuration conf, HTableDescriptor tableDescriptor,
    RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException,
    InterruptedException, ClassNotFoundException {
  Job job = new Job(conf, "testLocalMRIncrementalLoad");
  job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      KeyValueSerialization.class.getName());
  setupRandomGeneratorMapper(job);
  HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
  FileOutputFormat.setOutputPath(job, outDir);

  assertFalse(util.getTestFileSystem().exists(outDir)) ;

  assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

  assertTrue(job.waitForCompletion(true));
}
 
Example 15
Project: ditb   File: PerformanceEvaluation.java   View source code 6 votes vote down vote up
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
      InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}
 
Example 16
Project: aliyun-maxcompute-data-collectors   File: ExportJobBase.java   View source code 6 votes vote down vote up
@Override
protected void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

 if (options.getOdpsTable() != null) {
    Configuration conf = job.getConfiguration();
    setInputFormatClass(OdpsExportInputFormat.class);
    conf.set(OdpsConstants.TABLE_NAME, options.getOdpsTable());
    conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID());
    conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey());
    conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint());
    conf.set(OdpsConstants.PROJECT, options.getOdpsProject());
    String partitionSpec = options.getOdpsPartitionSpec();
    if (partitionSpec != null) {
      conf.set(OdpsConstants.PARTITION_SPEC, partitionSpec);
    }
    setMapperClass(OdpsExportMapper.class);
  }
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
  if (!isHCatJob && options.getOdpsTable() == null) {
    FileInputFormat.addInputPath(job, getInputPath());
  }

}
 
Example 17
Project: hadoop   File: TestCLI.java   View source code 6 votes vote down vote up
@Test
public void testListAttemptIdsWithInvalidInputs() throws Exception {
  JobID jobId = JobID.forName(jobIdStr);
  Cluster mockCluster = mock(Cluster.class);
  Job job = mock(Job.class);
  CLI cli = spy(new CLI());

  doReturn(mockCluster).when(cli).createCluster();
  when(mockCluster.getJob(jobId)).thenReturn(job);

  int retCode_JOB_SETUP = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "JOB_SETUP", "running" });

  int retCode_JOB_CLEANUP = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "JOB_CLEANUP", "running" });

  int retCode_invalidTaskState = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "REDUCE", "complete" });

  assertEquals("JOB_SETUP is an invalid input,exit code should be -1", -1,
      retCode_JOB_SETUP);
  assertEquals("JOB_CLEANUP is an invalid input,exit code should be -1", -1,
      retCode_JOB_CLEANUP);
  assertEquals("complete is an invalid input,exit code should be -1", -1,
      retCode_invalidTaskState);

}
 
Example 18
Project: hadoop   File: TestJobCounters.java   View source code 6 votes vote down vote up
@Test
public void testNewCounterC() throws Exception {
  final Job job = createJob();
  final Configuration conf = job.getConfiguration();
  conf.setInt(JobContext.IO_SORT_FACTOR, 3);
  createWordsFile(inFiles[3], conf);
  createWordsFile(inFiles[4], conf);
  long inputSize = 0;
  inputSize += getFileSize(inFiles[0]);
  inputSize += getFileSize(inFiles[1]);
  inputSize += getFileSize(inFiles[2]);
  inputSize += getFileSize(inFiles[3]);
  inputSize += getFileSize(inFiles[4]);
  org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(
      job, IN_DIR);
  org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(
      job, new Path(OUT_DIR, "outputN2"));
  assertTrue(job.waitForCompletion(true));
  final Counters c1 = Counters.downgrade(job.getCounters());
  validateCounters(c1, 122880, 25600, 102400);
  validateFileCounters(c1, inputSize, 0, 0, 0);
}
 
Example 19
Project: hadoop   File: DataDrivenDBInputFormat.java   View source code 5 votes vote down vote up
/** Note that the "orderBy" column is called the "splitBy" in this version.
  * We reuse the same field, but it's not strictly ordering it -- just partitioning
  * the results.
  */
public static void setInput(Job job, 
    Class<? extends DBWritable> inputClass,
    String tableName,String conditions, 
    String splitBy, String... fieldNames) {
  DBInputFormat.setInput(job, inputClass, tableName, conditions, splitBy, fieldNames);
  job.setInputFormatClass(DataDrivenDBInputFormat.class);
}
 
Example 20
Project: hadoop   File: ValueAggregatorJob.java   View source code 5 votes vote down vote up
public static JobControl createValueAggregatorJobs(String args[],
  Class<? extends ValueAggregatorDescriptor>[] descriptors) 
throws IOException {
  
  JobControl theControl = new JobControl("ValueAggregatorJobs");
  ArrayList<ControlledJob> dependingJobs = new ArrayList<ControlledJob>();
  Configuration conf = new Configuration();
  if (descriptors != null) {
    conf = setAggregatorDescriptors(descriptors);
  }
  Job job = createValueAggregatorJob(conf, args);
  ControlledJob cjob = new ControlledJob(job, dependingJobs);
  theControl.addJob(cjob);
  return theControl;
}
 
Example 21
Project: hadoop   File: SecondarySort.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length != 2) {
    System.err.println("Usage: secondarysort <in> <out>");
    System.exit(2);
  }
  Job job = Job.getInstance(conf, "secondary sort");
  job.setJarByClass(SecondarySort.class);
  job.setMapperClass(MapClass.class);
  job.setReducerClass(Reduce.class);

  // group and partition by the first int in the pair
  job.setPartitionerClass(FirstPartitioner.class);
  job.setGroupingComparatorClass(FirstGroupingComparator.class);

  // the map output is IntPair, IntWritable
  job.setMapOutputKeyClass(IntPair.class);
  job.setMapOutputValueClass(IntWritable.class);

  // the reduce output is Text, IntWritable
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  
  FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 22
Project: DocIT   File: Total.java   View source code 5 votes vote down vote up
public static void total(String name, String in, String out)
		throws IOException, InterruptedException, ClassNotFoundException {
	Configuration conf = new Configuration();
	conf.set(QUERIED_NAME, name);
	Job job = Job.getInstance(new Cluster(conf), conf);
	job.setJarByClass(Total.class);

	// in
	if (!in.endsWith("/"))
		in = in.concat("/");
	in = in.concat("employees");
	SequenceFileInputFormat.addInputPath(job, new Path(in));
	job.setInputFormatClass(SequenceFileInputFormat.class);

	// map
	job.setMapperClass(TotalMapper.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(DoubleWritable.class);

	// reduce
	job.setCombinerClass(TotalReducer.class);
	job.setReducerClass(TotalReducer.class);

	// out
	SequenceFileOutputFormat.setOutputPath(job, new Path(out));
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.waitForCompletion(true);
}
 
Example 23
Project: LDA   File: InitDriver.java   View source code 5 votes vote down vote up
public static void run(Configuration conf, Path[] inputPath, Path outputPath) throws IOException, ClassNotFoundException, InterruptedException {
        String jobName = "init matrix";
        Job job = new Job(conf, jobName);

        job.setMapOutputKeyClass(twoDimensionIndexWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(twoDimensionIndexWritable.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setMapperClass(InitMapper.class);
        job.setReducerClass(InitReducer.class);
        job.setNumReduceTasks(1);

        for(Path path : inputPath) {
            FileInputFormat.addInputPath(job, path);
        }
        Path output = new Path(outputPath, "initDir");
        FileOutputFormat.setOutputPath(job, output);

        job.setJarByClass(LDADriver.class);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Init failed");
        }
}
 
Example 24
Project: ditb   File: WALPlayer.java   View source code 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
  if (otherArgs.length < 2) {
    usage("Wrong number of arguments: " + otherArgs.length);
    System.exit(-1);
  }
  Job job = createSubmittableJob(otherArgs);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 25
Project: ditb   File: TestTableInputFormatScanBase.java   View source code 5 votes vote down vote up
/**
 * Tests a MR scan using specific start and stop rows.
 *
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
protected void testScan(String start, String stop, String last)
throws IOException, InterruptedException, ClassNotFoundException {
  String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
    "To" + (stop != null ? stop.toUpperCase() : "Empty");
  LOG.info("Before map/reduce startup - job " + jobName);
  Configuration c = new Configuration(TEST_UTIL.getConfiguration());
  Scan scan = new Scan();
  scan.addFamily(INPUT_FAMILY);
  if (start != null) {
    scan.setStartRow(Bytes.toBytes(start));
  }
  c.set(KEY_STARTROW, start != null ? start : "");
  if (stop != null) {
    scan.setStopRow(Bytes.toBytes(stop));
  }
  c.set(KEY_LASTROW, last != null ? last : "");
  LOG.info("scan before: " + scan);
  Job job = new Job(c, jobName);
  TableMapReduceUtil.initTableMapperJob(
    Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
    ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
  job.setReducerClass(ScanReducer.class);
  job.setNumReduceTasks(1); // one to get final "first" and "last" key
  FileOutputFormat.setOutputPath(job,
      new Path(TEST_UTIL.getDataTestDir(), job.getJobName()));
  LOG.info("Started " + job.getJobName());
  assertTrue(job.waitForCompletion(true));
  LOG.info("After map/reduce completion - job " + jobName);
}
 
Example 26
Project: ditb   File: TableMapReduceUtil.java   View source code 5 votes vote down vote up
/**
 * Use this before submitting a TableMap job. It will appropriately set up
 * the job.
 *
 * @param table  The table name to read from.
 * @param scan  The scan instance with the columns, time range etc.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(TableName table,
    Scan scan,
    Class<? extends TableMapper> mapper,
    Class<?> outputKeyClass,
    Class<?> outputValueClass,
    Job job) throws IOException {
  initTableMapperJob(table.getNameAsString(),
      scan,
      mapper,
      outputKeyClass,
      outputValueClass,
      job,
      true);
}
 
Example 27
Project: big_data   File: ActiveUserRunner.java   View source code 5 votes vote down vote up
/**
 * 初始化scan集合
 * 
 * @param job
 * @return
 */
private List<Scan> initScans(Job job) {
	Configuration conf = job.getConfiguration();
	// 获取运行时间: yyyy-MM-dd
	String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
	long startDate = TimeUtil.parseString2Long(date);
	long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;

	Scan scan = new Scan();
	// 定义hbase扫描的开始rowkey和结束rowkey
	scan.setStartRow(Bytes.toBytes("" + startDate));
	scan.setStopRow(Bytes.toBytes("" + endDate));

	FilterList filterList = new FilterList();
	// 定义mapper中需要获取的列名
	String[] columns = new String[] { EventLogConstants.LOG_COLUMN_NAME_UUID, // 用户id
			EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, // 服务器时间
			EventLogConstants.LOG_COLUMN_NAME_PLATFORM, // 平台名称
			EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, // 浏览器名称
			EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION // 浏览器版本号
	};
	filterList.addFilter(this.getColumnFilter(columns));

	scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
	scan.setFilter(filterList);
	return Lists.newArrayList(scan);
}
 
Example 28
Project: hadoop   File: TestBinaryTokenFile.java   View source code 5 votes vote down vote up
private void setupBinaryTokenFile(Job job) {
// Credentials in the job will not have delegation tokens
// because security is disabled. Fetch delegation tokens
// and store in binary token file.
  createBinaryTokenFile(job.getConfiguration());
  job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY,
      binaryTokenFileName.toString());
  // NB: the MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY
  // key now gets deleted from config,
  // so it's not accessible in the job's config. So,
  // we use another key to pass the file name into the job configuration:
  job.getConfiguration().set(KEY_SECURITY_TOKEN_FILE_NAME,
      binaryTokenFileName.toString());
}
 
Example 29
Project: hadoop   File: TestCLI.java   View source code 5 votes vote down vote up
@Test
public void testListAttemptIdsWithValidInput() throws Exception {
  JobID jobId = JobID.forName(jobIdStr);
  Cluster mockCluster = mock(Cluster.class);
  Job job = mock(Job.class);
  CLI cli = spy(new CLI());

  doReturn(mockCluster).when(cli).createCluster();
  when(job.getTaskReports(TaskType.MAP)).thenReturn(
      getTaskReports(jobId, TaskType.MAP));
  when(job.getTaskReports(TaskType.REDUCE)).thenReturn(
      getTaskReports(jobId, TaskType.REDUCE));
  when(mockCluster.getJob(jobId)).thenReturn(job);

  int retCode_MAP = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
      "MAP", "running" });
  // testing case insensitive behavior
  int retCode_map = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
      "map", "running" });

  int retCode_REDUCE = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
      "REDUCE", "running" });

  int retCode_completed = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "REDUCE", "completed" });

  assertEquals("MAP is a valid input,exit code should be 0", 0, retCode_MAP);
  assertEquals("map is a valid input,exit code should be 0", 0, retCode_map);
  assertEquals("REDUCE is a valid input,exit code should be 0", 0,
      retCode_REDUCE);
  assertEquals(
      "REDUCE and completed are a valid inputs to -list-attempt-ids,exit code should be 0",
      0, retCode_completed);

  verify(job, times(2)).getTaskReports(TaskType.MAP);
  verify(job, times(2)).getTaskReports(TaskType.REDUCE);
}
 
Example 30
Project: hadoop   File: MultiFileWordCount.java   View source code 5 votes vote down vote up
public int run(String[] args) throws Exception {

    if(args.length < 2) {
      printUsage();
      return 2;
    }

    Job job = Job.getInstance(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClass(MultiFileWordCount.class);

    //set the InputFormat of the job to our InputFormat
    job.setInputFormatClass(MyInputFormat.class);
    
    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
  }
 
Example 31
Project: ditb   File: TestHFileOutputFormat2.java   View source code 5 votes vote down vote up
/**
 * Run small MR job.
 */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
public void testWritingPEData() throws Exception {
  Configuration conf = util.getConfiguration();
  Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
  FileSystem fs = testDir.getFileSystem(conf);

  // Set down this value or we OOME in eclipse.
  conf.setInt("mapreduce.task.io.sort.mb", 20);
  // Write a few files.
  conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);

  Job job = new Job(conf, "testWritingPEData");
  setupRandomGeneratorMapper(job);
  // This partitioner doesn't work well for number keys but using it anyways
  // just to demonstrate how to configure it.
  byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
  byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

  Arrays.fill(startKey, (byte)0);
  Arrays.fill(endKey, (byte)0xff);

  job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
  // Set start and end rows for partitioner.
  SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
  SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
  job.setReducerClass(KeyValueSortReducer.class);
  job.setOutputFormatClass(HFileOutputFormat2.class);
  job.setNumReduceTasks(4);
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      KeyValueSerialization.class.getName());

  FileOutputFormat.setOutputPath(job, testDir);
  assertTrue(job.waitForCompletion(false));
  FileStatus [] files = fs.listStatus(testDir);
  assertTrue(files.length > 0);
}
 
Example 32
Project: hadoop   File: DistCp.java   View source code 5 votes vote down vote up
/**
 * Create and submit the mapreduce job.
 * @return The mapreduce job object that has been submitted
 */
public Job createAndSubmitJob() throws Exception {
  assert inputOptions != null;
  assert getConf() != null;
  Job job = null;
  try {
    synchronized(this) {
      //Don't cleanup while we are setting up.
      metaFolder = createMetaFolderPath();
      jobFS = metaFolder.getFileSystem(getConf());
      job = createJob();
    }
    if (inputOptions.shouldUseDiff()) {
      if (!DistCpSync.sync(inputOptions, getConf())) {
        inputOptions.disableUsingDiff();
      }
    }
    createInputFileListing(job);

    job.submit();
    submitted = true;
  } finally {
    if (!submitted) {
      cleanup();
    }
  }

  String jobID = job.getJobID().toString();
  job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID);
  LOG.info("DistCp job-id: " + jobID);

  return job;
}
 
Example 33
Project: hadoop   File: FailJob.java   View source code 5 votes vote down vote up
public int run(String[] args) throws Exception {
  if(args.length < 1) {
    System.err.println("FailJob " +
        " (-failMappers|-failReducers)");
    ToolRunner.printGenericCommandUsage(System.err);
    return 2;
  }
  boolean failMappers = false, failReducers = false;

  for (int i = 0; i < args.length; i++ ) {
    if (args[i].equals("-failMappers")) {
      failMappers = true;
    }
    else if(args[i].equals("-failReducers")) {
      failReducers = true;
    }
  }
  if (!(failMappers ^ failReducers)) {
    System.err.println("Exactly one of -failMappers or -failReducers must be specified.");
    return 3;
  }

  // Write a file with one line per mapper.
  final FileSystem fs = FileSystem.get(getConf());
  Path inputDir = new Path(FailJob.class.getSimpleName() + "_in");
  fs.mkdirs(inputDir);
  for (int i = 0; i < getConf().getInt("mapred.map.tasks", 1); ++i) {
    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(
        fs.create(new Path(inputDir, Integer.toString(i)))));
    w.write(Integer.toString(i) + "\n");
    w.close();
  }

  Job job = createJob(failMappers, failReducers, inputDir);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 34
Project: ditb   File: TableMapReduceUtil.java   View source code 5 votes vote down vote up
/**
 * Use this before submitting a Multi TableMap job. It will appropriately set
 * up the job.
 *
 * @param scans The list of {@link Scan} objects to read from.
 * @param mapper The mapper class to use.
 * @param outputKeyClass The class of the output key.
 * @param outputValueClass The class of the output value.
 * @param job The current job to adjust. Make sure the passed job is carrying
 *          all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the
 *          configured job classes via the distributed cache (tmpjars).
 * @param initCredentials whether to initialize hbase auth credentials for the job
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(List<Scan> scans,
    Class<? extends TableMapper> mapper,
    Class<?> outputKeyClass,
    Class<?> outputValueClass, Job job,
    boolean addDependencyJars,
    boolean initCredentials) throws IOException {
  job.setInputFormatClass(MultiTableInputFormat.class);
  if (outputValueClass != null) {
    job.setMapOutputValueClass(outputValueClass);
  }
  if (outputKeyClass != null) {
    job.setMapOutputKeyClass(outputKeyClass);
  }
  job.setMapperClass(mapper);
  Configuration conf = job.getConfiguration();
  HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
  List<String> scanStrings = new ArrayList<String>();

  for (Scan scan : scans) {
    scanStrings.add(convertScanToString(scan));
  }
  job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
    scanStrings.toArray(new String[scanStrings.size()]));

  if (addDependencyJars) {
    addDependencyJars(job);
  }

  if (initCredentials) {
    initCredentials(job);
  }
}
 
Example 35
Project: ditb   File: IntegrationTestLoadAndVerify.java   View source code 5 votes vote down vote up
@Override
public Job createSubmittableJob(String[] args) throws IOException {
  Job job = super.createSubmittableJob(args);
  // Call my class instead.
  job.setJarByClass(WALMapperSearcher.class);
  job.setMapperClass(WALMapperSearcher.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  return job;
}
 
Example 36
Project: mapreduce-samples   File: UnitSum.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);
        job.setMapperClass(PassMapper.class);
        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.waitForCompletion(true);
    }
 
Example 37
Project: ditb   File: TestTableMapReduceUtil.java   View source code 5 votes vote down vote up
@Test
public void testInitTableMapperJob1() throws Exception {
  Configuration configuration = new Configuration();
  Job job = new Job(configuration, "tableName");
  // test 
  TableMapReduceUtil.initTableMapperJob("Table", new Scan(), Import.Importer.class, Text.class,
      Text.class, job, false, WALInputFormat.class);
  assertEquals(WALInputFormat.class, job.getInputFormatClass());
  assertEquals(Import.Importer.class, job.getMapperClass());
  assertEquals(LongWritable.class, job.getOutputKeyClass());
  assertEquals(Text.class, job.getOutputValueClass());
  assertNull(job.getCombinerClass());
  assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
}
 
Example 38
Project: aliyun-maxcompute-data-collectors   File: HdfsOdpsImportJob.java   View source code 5 votes vote down vote up
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
    throws IOException {

  ArrayList<FileStatus> files = new ArrayList<FileStatus>();
  FileStatus[] dirs;
  dirs = fs.globStatus(fs.makeQualified(getInputPath()));
  for (int i = 0; (dirs != null && i < dirs.length); i++) {
    files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
    // We only check one file, so exit the loop when we have at least
    // one.
    if (files.size() > 0) {
      break;
    }
  }

  ParquetMetadata parquetMetadata;
  try {
    parquetMetadata =
        ParquetFileReader.readFooter(job.getConfiguration(),
            fs.makeQualified(files.get(0).getPath()));
  } catch (IOException e) {
    LOG.error("Wrong file format. Please check the export file's format.", e);
    throw e;
  }
  MessageType schema = parquetMetadata.getFileMetaData().getSchema();
  Schema avroSchema = new AvroSchemaConverter().convert(schema);
  DatasetDescriptor descriptor =
      new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
          .compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
  return descriptor;
}
 
Example 39
Project: ditb   File: MapreduceTestingShim.java   View source code 5 votes vote down vote up
@Override
public Job newJob(Configuration conf) throws IOException {
  // Implementing:
  // return new Job(conf);
  Constructor<Job> c;
  try {
    c = Job.class.getConstructor(Configuration.class);
    return c.newInstance(conf);
  } catch (Exception e) {
    throw new IllegalStateException(
        "Failed to instantiate new Job(conf)", e);
  }
}
 
Example 40
Project: mumu-mapreduce   File: MaxTemperatureMapReduce.java   View source code 5 votes vote down vote up
public static void main(String[] args) {
    if (args.length != 2) {
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);
    }

    String temperatureInput = args[0];
    String temperatureOutput = args[1];
    try {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(MaxTemperatureMapReduce.class);
        job.setJobName("MaxTemperature");
        FileInputFormat.addInputPath(job, new Path(temperatureInput));
        FileOutputFormat.setOutputPath(job, new Path(temperatureOutput));

        job.setMapperClass(MaxTemperatureMapper.class);
        job.setReducerClass(MaxTemperatureReduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        e.printStackTrace();
    }
}