Java Code Examples for org.apache.hadoop.mapreduce.Job

The following are top voted examples for showing how to use org.apache.hadoop.mapreduce.Job. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: learn-to-hadoop   File: MaxTemperatureWithCombiner.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperatureWithCombiner.class);
    job.setJobName("Max Temperature With Combiner");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 2
Project: Wikipedia-Index   File: TF_IDF.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("TF-IDFCount");
	job.setJarByClass(TF_IDF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(TextArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TF_IDFMap.class);
	job.setReducerClass(TF_IDFReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileInputFormat.addInputPath(job, new Path(args[1]));
	FileOutputFormat.setOutputPath(job, new Path(args[2]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
Example 3
Project: hadoop   File: MapReduceTestUtil.java   Source Code and License 7 votes vote down vote up
/**
 * Creates a simple copy job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  conf.setInt(MRJobConfig.NUM_MAPS, 3);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(DataCopyMapper.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopyReducer.class);
  theJob.setNumReduceTasks(1);
  return theJob;
}
 
Example 4
Project: Hadoop-Codes   File: MaxTempDriver.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "maxtemp");
	
	job.setMapperClass(MaxTempMapper.class);
	job.setReducerClass(MaxTempReducer.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(FloatWritable.class);

	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
Example 5
Project: aliyun-maxcompute-data-collectors   File: TestImportJob.java   Source Code and License 7 votes vote down vote up
@Override
public void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

  // Write a line of text into a file so that we can get
  // a record to the map task.
  Path dir = new Path(this.options.getTempDir());
  Path p = new Path(dir, "sqoop-dummy-import-job-file.txt");
  FileSystem fs = FileSystem.getLocal(this.options.getConf());
  if (fs.exists(p)) {
    boolean result = fs.delete(p, false);
    assertTrue("Couldn't delete temp file!", result);
  }

  BufferedWriter w = new BufferedWriter(
      new OutputStreamWriter(fs.create(p)));
  w.append("This is a line!");
  w.close();

  FileInputFormat.addInputPath(job, p);

  // And set the InputFormat itself.
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}
 
Example 6
Project: hadoop   File: LoadJob.java   Source Code and License 6 votes vote down vote up
public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  ugi.doAs(
    new PrivilegedExceptionAction<Job>() {
      public Job run() throws IOException, ClassNotFoundException,
                              InterruptedException {
        job.setMapperClass(LoadMapper.class);
        job.setReducerClass(LoadReducer.class);
        job.setNumReduceTasks(jobdesc.getNumberReduces());
        job.setMapOutputKeyClass(GridmixKey.class);
        job.setMapOutputValueClass(GridmixRecord.class);
        job.setSortComparatorClass(LoadSortComparator.class);
        job.setGroupingComparatorClass(SpecGroupingComparator.class);
        job.setInputFormatClass(LoadInputFormat.class);
        job.setOutputFormatClass(RawBytesOutputFormat.class);
        job.setPartitionerClass(DraftPartitioner.class);
        job.setJarByClass(LoadJob.class);
        job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true);
        FileOutputFormat.setOutputPath(job, outdir);
        job.submit();
        return job;
      }
    });

  return job;
}
 
Example 7
Project: hadoop   File: TestChainErrors.java   Source Code and License 6 votes vote down vote up
/**
 * Tests one of the maps consuming output.
 * 
 * @throws Exception
 */
public void testChainMapNoOuptut() throws Exception {
  Configuration conf = createJobConf();
  String expectedOutput = "";

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input);
  job.setJobName("chain");

  ChainMapper.addMapper(job, ConsumeMap.class, IntWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  job.waitForCompletion(true);
  assertTrue("Job failed", job.isSuccessful());
  assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil
      .readOutput(outDir, conf));
}
 
Example 8
Project: ditb   File: IntegrationTestTableMapReduceUtil.java   Source Code and License 6 votes vote down vote up
/**
 * Look for jars we expect to be on the classpath by name.
 */
@Test
public void testAddDependencyJars() throws Exception {
  Job job = new Job();
  TableMapReduceUtil.addDependencyJars(job);
  String tmpjars = job.getConfiguration().get("tmpjars");

  // verify presence of modules
  assertTrue(tmpjars.contains("hbase-common"));
  assertTrue(tmpjars.contains("hbase-protocol"));
  assertTrue(tmpjars.contains("hbase-client"));
  assertTrue(tmpjars.contains("hbase-hadoop-compat"));
  assertTrue(tmpjars.contains("hbase-server"));

  // verify presence of 3rd party dependencies.
  assertTrue(tmpjars.contains("zookeeper"));
  assertTrue(tmpjars.contains("netty"));
  assertTrue(tmpjars.contains("protobuf"));
  assertTrue(tmpjars.contains("guava"));
  assertTrue(tmpjars.contains("htrace"));
}
 
Example 9
Project: ditb   File: CellCounter.java   Source Code and License 6 votes vote down vote up
/**
 * Main entry point.
 *
 * @param args The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("ERROR: Wrong number of parameters: " + args.length);
    System.err.println("Usage: CellCounter ");
    System.err.println("       <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
      "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
    System.err.println("  Note: -D properties will be applied to the conf used. ");
    System.err.println("  Additionally, the following SCAN properties can be specified");
    System.err.println("  to get fine grained control on what is counted..");
    System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
    System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
        "string : used to separate the rowId/column family name and qualifier name.");
    System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
        "operation to a limited subset of rows from the table based on regex or prefix pattern.");
    System.exit(-1);
  }
  Job job = createSubmittableJob(conf, otherArgs);
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 10
Project: Hadoop-Codes   File: testDriver.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "test");
	
	job.setMapperClass(testMapper.class);
	job.setPartitionerClass(testPartitioner.class);
	job.setReducerClass(testReducer.class);
	job.setNumReduceTasks(10);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	
	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
Example 11
Project: aliyun-maxcompute-data-collectors   File: MergeJob.java   Source Code and License 6 votes vote down vote up
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath)
    throws IOException {
  LOG.info("Trying to merge avro files");
  final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf);
  final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf);
  if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) {
    throw new IOException("Invalid schema for input directories. Schema for old data: ["
        + oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]");
  }
  LOG.debug("Avro Schema:" + oldPathSchema);
  job.setInputFormatClass(AvroInputFormat.class);
  job.setOutputFormatClass(AvroOutputFormat.class);
  job.setMapperClass(MergeAvroMapper.class);
  job.setReducerClass(MergeAvroReducer.class);
  AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema);
}
 
Example 12
Project: ditb   File: SampleUploader.java   Source Code and License 6 votes vote down vote up
/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
  Path inputPath = new Path(args[0]);
  String tableName = args[1];
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJarByClass(Uploader.class);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapperClass(Uploader.class);
  // No reducers.  Just write straight to table.  Call initTableReducerJob
  // because it sets up the TableOutputFormat.
  TableMapReduceUtil.initTableReducerJob(tableName, null, job);
  job.setNumReduceTasks(0);
  return job;
}
 
Example 13
Project: ditb   File: TestTableInputFormat.java   Source Code and License 6 votes vote down vote up
void testInputFormat(Class<? extends InputFormat> clazz)
    throws IOException, InterruptedException, ClassNotFoundException {
  final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
  job.setInputFormatClass(clazz);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);

  LOG.debug("submitting job.");
  assertTrue("job failed!", job.waitForCompletion(true));
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
  assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
  assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
  assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
}
 
Example 14
Project: ditb   File: TestHFileOutputFormat2.java   Source Code and License 6 votes vote down vote up
private void runIncrementalPELoad(Configuration conf, HTableDescriptor tableDescriptor,
    RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException,
    InterruptedException, ClassNotFoundException {
  Job job = new Job(conf, "testLocalMRIncrementalLoad");
  job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      KeyValueSerialization.class.getName());
  setupRandomGeneratorMapper(job);
  HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
  FileOutputFormat.setOutputPath(job, outDir);

  assertFalse(util.getTestFileSystem().exists(outDir)) ;

  assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

  assertTrue(job.waitForCompletion(true));
}
 
Example 15
Project: ditb   File: PerformanceEvaluation.java   Source Code and License 6 votes vote down vote up
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
      InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}
 
Example 16
Project: aliyun-maxcompute-data-collectors   File: ExportJobBase.java   Source Code and License 6 votes vote down vote up
@Override
protected void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

 if (options.getOdpsTable() != null) {
    Configuration conf = job.getConfiguration();
    setInputFormatClass(OdpsExportInputFormat.class);
    conf.set(OdpsConstants.TABLE_NAME, options.getOdpsTable());
    conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID());
    conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey());
    conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint());
    conf.set(OdpsConstants.PROJECT, options.getOdpsProject());
    String partitionSpec = options.getOdpsPartitionSpec();
    if (partitionSpec != null) {
      conf.set(OdpsConstants.PARTITION_SPEC, partitionSpec);
    }
    setMapperClass(OdpsExportMapper.class);
  }
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
  if (!isHCatJob && options.getOdpsTable() == null) {
    FileInputFormat.addInputPath(job, getInputPath());
  }

}
 
Example 17
Project: hadoop   File: TestCLI.java   Source Code and License 6 votes vote down vote up
@Test
public void testListAttemptIdsWithInvalidInputs() throws Exception {
  JobID jobId = JobID.forName(jobIdStr);
  Cluster mockCluster = mock(Cluster.class);
  Job job = mock(Job.class);
  CLI cli = spy(new CLI());

  doReturn(mockCluster).when(cli).createCluster();
  when(mockCluster.getJob(jobId)).thenReturn(job);

  int retCode_JOB_SETUP = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "JOB_SETUP", "running" });

  int retCode_JOB_CLEANUP = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "JOB_CLEANUP", "running" });

  int retCode_invalidTaskState = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "REDUCE", "complete" });

  assertEquals("JOB_SETUP is an invalid input,exit code should be -1", -1,
      retCode_JOB_SETUP);
  assertEquals("JOB_CLEANUP is an invalid input,exit code should be -1", -1,
      retCode_JOB_CLEANUP);
  assertEquals("complete is an invalid input,exit code should be -1", -1,
      retCode_invalidTaskState);

}
 
Example 18
Project: hadoop   File: TestJobCounters.java   Source Code and License 6 votes vote down vote up
@Test
public void testNewCounterC() throws Exception {
  final Job job = createJob();
  final Configuration conf = job.getConfiguration();
  conf.setInt(JobContext.IO_SORT_FACTOR, 3);
  createWordsFile(inFiles[3], conf);
  createWordsFile(inFiles[4], conf);
  long inputSize = 0;
  inputSize += getFileSize(inFiles[0]);
  inputSize += getFileSize(inFiles[1]);
  inputSize += getFileSize(inFiles[2]);
  inputSize += getFileSize(inFiles[3]);
  inputSize += getFileSize(inFiles[4]);
  org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(
      job, IN_DIR);
  org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(
      job, new Path(OUT_DIR, "outputN2"));
  assertTrue(job.waitForCompletion(true));
  final Counters c1 = Counters.downgrade(job.getCounters());
  validateCounters(c1, 122880, 25600, 102400);
  validateFileCounters(c1, inputSize, 0, 0, 0);
}
 
Example 19
Project: hadoop   File: DataDrivenDBInputFormat.java   Source Code and License 5 votes vote down vote up
/** Note that the "orderBy" column is called the "splitBy" in this version.
  * We reuse the same field, but it's not strictly ordering it -- just partitioning
  * the results.
  */
public static void setInput(Job job, 
    Class<? extends DBWritable> inputClass,
    String tableName,String conditions, 
    String splitBy, String... fieldNames) {
  DBInputFormat.setInput(job, inputClass, tableName, conditions, splitBy, fieldNames);
  job.setInputFormatClass(DataDrivenDBInputFormat.class);
}
 
Example 20
Project: hadoop   File: ValueAggregatorJob.java   Source Code and License 5 votes vote down vote up
public static JobControl createValueAggregatorJobs(String args[],
  Class<? extends ValueAggregatorDescriptor>[] descriptors) 
throws IOException {
  
  JobControl theControl = new JobControl("ValueAggregatorJobs");
  ArrayList<ControlledJob> dependingJobs = new ArrayList<ControlledJob>();
  Configuration conf = new Configuration();
  if (descriptors != null) {
    conf = setAggregatorDescriptors(descriptors);
  }
  Job job = createValueAggregatorJob(conf, args);
  ControlledJob cjob = new ControlledJob(job, dependingJobs);
  theControl.addJob(cjob);
  return theControl;
}
 
Example 21
Project: hadoop   File: SecondarySort.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length != 2) {
    System.err.println("Usage: secondarysort <in> <out>");
    System.exit(2);
  }
  Job job = Job.getInstance(conf, "secondary sort");
  job.setJarByClass(SecondarySort.class);
  job.setMapperClass(MapClass.class);
  job.setReducerClass(Reduce.class);

  // group and partition by the first int in the pair
  job.setPartitionerClass(FirstPartitioner.class);
  job.setGroupingComparatorClass(FirstGroupingComparator.class);

  // the map output is IntPair, IntWritable
  job.setMapOutputKeyClass(IntPair.class);
  job.setMapOutputValueClass(IntWritable.class);

  // the reduce output is Text, IntWritable
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  
  FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 22
Project: DocIT   File: Total.java   Source Code and License 5 votes vote down vote up
public static void total(String name, String in, String out)
		throws IOException, InterruptedException, ClassNotFoundException {
	Configuration conf = new Configuration();
	conf.set(QUERIED_NAME, name);
	Job job = Job.getInstance(new Cluster(conf), conf);
	job.setJarByClass(Total.class);

	// in
	if (!in.endsWith("/"))
		in = in.concat("/");
	in = in.concat("employees");
	SequenceFileInputFormat.addInputPath(job, new Path(in));
	job.setInputFormatClass(SequenceFileInputFormat.class);

	// map
	job.setMapperClass(TotalMapper.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(DoubleWritable.class);

	// reduce
	job.setCombinerClass(TotalReducer.class);
	job.setReducerClass(TotalReducer.class);

	// out
	SequenceFileOutputFormat.setOutputPath(job, new Path(out));
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.waitForCompletion(true);
}
 
Example 23
Project: LDA   File: InitDriver.java   Source Code and License 5 votes vote down vote up
public static void run(Configuration conf, Path[] inputPath, Path outputPath) throws IOException, ClassNotFoundException, InterruptedException {
        String jobName = "init matrix";
        Job job = new Job(conf, jobName);

        job.setMapOutputKeyClass(twoDimensionIndexWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(twoDimensionIndexWritable.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setMapperClass(InitMapper.class);
        job.setReducerClass(InitReducer.class);
        job.setNumReduceTasks(1);

        for(Path path : inputPath) {
            FileInputFormat.addInputPath(job, path);
        }
        Path output = new Path(outputPath, "initDir");
        FileOutputFormat.setOutputPath(job, output);

        job.setJarByClass(LDADriver.class);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Init failed");
        }
}
 
Example 24
Project: ditb   File: WALPlayer.java   Source Code and License 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
  if (otherArgs.length < 2) {
    usage("Wrong number of arguments: " + otherArgs.length);
    System.exit(-1);
  }
  Job job = createSubmittableJob(otherArgs);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 25
Project: ditb   File: TestTableInputFormatScanBase.java   Source Code and License 5 votes vote down vote up
/**
 * Tests a MR scan using specific start and stop rows.
 *
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
protected void testScan(String start, String stop, String last)
throws IOException, InterruptedException, ClassNotFoundException {
  String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
    "To" + (stop != null ? stop.toUpperCase() : "Empty");
  LOG.info("Before map/reduce startup - job " + jobName);
  Configuration c = new Configuration(TEST_UTIL.getConfiguration());
  Scan scan = new Scan();
  scan.addFamily(INPUT_FAMILY);
  if (start != null) {
    scan.setStartRow(Bytes.toBytes(start));
  }
  c.set(KEY_STARTROW, start != null ? start : "");
  if (stop != null) {
    scan.setStopRow(Bytes.toBytes(stop));
  }
  c.set(KEY_LASTROW, last != null ? last : "");
  LOG.info("scan before: " + scan);
  Job job = new Job(c, jobName);
  TableMapReduceUtil.initTableMapperJob(
    Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
    ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
  job.setReducerClass(ScanReducer.class);
  job.setNumReduceTasks(1); // one to get final "first" and "last" key
  FileOutputFormat.setOutputPath(job,
      new Path(TEST_UTIL.getDataTestDir(), job.getJobName()));
  LOG.info("Started " + job.getJobName());
  assertTrue(job.waitForCompletion(true));
  LOG.info("After map/reduce completion - job " + jobName);
}
 
Example 26
Project: ditb   File: TableMapReduceUtil.java   Source Code and License 5 votes vote down vote up
/**
 * Use this before submitting a TableMap job. It will appropriately set up
 * the job.
 *
 * @param table  The table name to read from.
 * @param scan  The scan instance with the columns, time range etc.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(TableName table,
    Scan scan,
    Class<? extends TableMapper> mapper,
    Class<?> outputKeyClass,
    Class<?> outputValueClass,
    Job job) throws IOException {
  initTableMapperJob(table.getNameAsString(),
      scan,
      mapper,
      outputKeyClass,
      outputValueClass,
      job,
      true);
}
 
Example 27
Project: big_data   File: ActiveUserRunner.java   Source Code and License 5 votes vote down vote up
/**
 * 初始化scan集合
 * 
 * @param job
 * @return
 */
private List<Scan> initScans(Job job) {
	Configuration conf = job.getConfiguration();
	// 获取运行时间: yyyy-MM-dd
	String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
	long startDate = TimeUtil.parseString2Long(date);
	long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;

	Scan scan = new Scan();
	// 定义hbase扫描的开始rowkey和结束rowkey
	scan.setStartRow(Bytes.toBytes("" + startDate));
	scan.setStopRow(Bytes.toBytes("" + endDate));

	FilterList filterList = new FilterList();
	// 定义mapper中需要获取的列名
	String[] columns = new String[] { EventLogConstants.LOG_COLUMN_NAME_UUID, // 用户id
			EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, // 服务器时间
			EventLogConstants.LOG_COLUMN_NAME_PLATFORM, // 平台名称
			EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, // 浏览器名称
			EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION // 浏览器版本号
	};
	filterList.addFilter(this.getColumnFilter(columns));

	scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
	scan.setFilter(filterList);
	return Lists.newArrayList(scan);
}
 
Example 28
Project: hadoop   File: TestBinaryTokenFile.java   Source Code and License 5 votes vote down vote up
private void setupBinaryTokenFile(Job job) {
// Credentials in the job will not have delegation tokens
// because security is disabled. Fetch delegation tokens
// and store in binary token file.
  createBinaryTokenFile(job.getConfiguration());
  job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY,
      binaryTokenFileName.toString());
  // NB: the MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY
  // key now gets deleted from config,
  // so it's not accessible in the job's config. So,
  // we use another key to pass the file name into the job configuration:
  job.getConfiguration().set(KEY_SECURITY_TOKEN_FILE_NAME,
      binaryTokenFileName.toString());
}
 
Example 29
Project: hadoop   File: TestCLI.java   Source Code and License 5 votes vote down vote up
@Test
public void testListAttemptIdsWithValidInput() throws Exception {
  JobID jobId = JobID.forName(jobIdStr);
  Cluster mockCluster = mock(Cluster.class);
  Job job = mock(Job.class);
  CLI cli = spy(new CLI());

  doReturn(mockCluster).when(cli).createCluster();
  when(job.getTaskReports(TaskType.MAP)).thenReturn(
      getTaskReports(jobId, TaskType.MAP));
  when(job.getTaskReports(TaskType.REDUCE)).thenReturn(
      getTaskReports(jobId, TaskType.REDUCE));
  when(mockCluster.getJob(jobId)).thenReturn(job);

  int retCode_MAP = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
      "MAP", "running" });
  // testing case insensitive behavior
  int retCode_map = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
      "map", "running" });

  int retCode_REDUCE = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
      "REDUCE", "running" });

  int retCode_completed = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "REDUCE", "completed" });

  assertEquals("MAP is a valid input,exit code should be 0", 0, retCode_MAP);
  assertEquals("map is a valid input,exit code should be 0", 0, retCode_map);
  assertEquals("REDUCE is a valid input,exit code should be 0", 0,
      retCode_REDUCE);
  assertEquals(
      "REDUCE and completed are a valid inputs to -list-attempt-ids,exit code should be 0",
      0, retCode_completed);

  verify(job, times(2)).getTaskReports(TaskType.MAP);
  verify(job, times(2)).getTaskReports(TaskType.REDUCE);
}
 
Example 30
Project: hadoop   File: MultiFileWordCount.java   Source Code and License 5 votes vote down vote up
public int run(String[] args) throws Exception {

    if(args.length < 2) {
      printUsage();
      return 2;
    }

    Job job = Job.getInstance(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClass(MultiFileWordCount.class);

    //set the InputFormat of the job to our InputFormat
    job.setInputFormatClass(MyInputFormat.class);
    
    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
  }
 
Example 31
Project: ditb   File: TestHFileOutputFormat2.java   Source Code and License 5 votes vote down vote up
/**
 * Run small MR job.
 */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
public void testWritingPEData() throws Exception {
  Configuration conf = util.getConfiguration();
  Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
  FileSystem fs = testDir.getFileSystem(conf);

  // Set down this value or we OOME in eclipse.
  conf.setInt("mapreduce.task.io.sort.mb", 20);
  // Write a few files.
  conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);

  Job job = new Job(conf, "testWritingPEData");
  setupRandomGeneratorMapper(job);
  // This partitioner doesn't work well for number keys but using it anyways
  // just to demonstrate how to configure it.
  byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
  byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

  Arrays.fill(startKey, (byte)0);
  Arrays.fill(endKey, (byte)0xff);

  job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
  // Set start and end rows for partitioner.
  SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
  SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
  job.setReducerClass(KeyValueSortReducer.class);
  job.setOutputFormatClass(HFileOutputFormat2.class);
  job.setNumReduceTasks(4);
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      KeyValueSerialization.class.getName());

  FileOutputFormat.setOutputPath(job, testDir);
  assertTrue(job.waitForCompletion(false));
  FileStatus [] files = fs.listStatus(testDir);
  assertTrue(files.length > 0);
}
 
Example 32
Project: hadoop   File: DistCp.java   Source Code and License 5 votes vote down vote up
/**
 * Create and submit the mapreduce job.
 * @return The mapreduce job object that has been submitted
 */
public Job createAndSubmitJob() throws Exception {
  assert inputOptions != null;
  assert getConf() != null;
  Job job = null;
  try {
    synchronized(this) {
      //Don't cleanup while we are setting up.
      metaFolder = createMetaFolderPath();
      jobFS = metaFolder.getFileSystem(getConf());
      job = createJob();
    }
    if (inputOptions.shouldUseDiff()) {
      if (!DistCpSync.sync(inputOptions, getConf())) {
        inputOptions.disableUsingDiff();
      }
    }
    createInputFileListing(job);

    job.submit();
    submitted = true;
  } finally {
    if (!submitted) {
      cleanup();
    }
  }

  String jobID = job.getJobID().toString();
  job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID);
  LOG.info("DistCp job-id: " + jobID);

  return job;
}
 
Example 33
Project: hadoop   File: FailJob.java   Source Code and License 5 votes vote down vote up
public int run(String[] args) throws Exception {
  if(args.length < 1) {
    System.err.println("FailJob " +
        " (-failMappers|-failReducers)");
    ToolRunner.printGenericCommandUsage(System.err);
    return 2;
  }
  boolean failMappers = false, failReducers = false;

  for (int i = 0; i < args.length; i++ ) {
    if (args[i].equals("-failMappers")) {
      failMappers = true;
    }
    else if(args[i].equals("-failReducers")) {
      failReducers = true;
    }
  }
  if (!(failMappers ^ failReducers)) {
    System.err.println("Exactly one of -failMappers or -failReducers must be specified.");
    return 3;
  }

  // Write a file with one line per mapper.
  final FileSystem fs = FileSystem.get(getConf());
  Path inputDir = new Path(FailJob.class.getSimpleName() + "_in");
  fs.mkdirs(inputDir);
  for (int i = 0; i < getConf().getInt("mapred.map.tasks", 1); ++i) {
    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(
        fs.create(new Path(inputDir, Integer.toString(i)))));
    w.write(Integer.toString(i) + "\n");
    w.close();
  }

  Job job = createJob(failMappers, failReducers, inputDir);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
Example 34
Project: ditb   File: TableMapReduceUtil.java   Source Code and License 5 votes vote down vote up
/**
 * Use this before submitting a Multi TableMap job. It will appropriately set
 * up the job.
 *
 * @param scans The list of {@link Scan} objects to read from.
 * @param mapper The mapper class to use.
 * @param outputKeyClass The class of the output key.
 * @param outputValueClass The class of the output value.
 * @param job The current job to adjust. Make sure the passed job is carrying
 *          all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the
 *          configured job classes via the distributed cache (tmpjars).
 * @param initCredentials whether to initialize hbase auth credentials for the job
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(List<Scan> scans,
    Class<? extends TableMapper> mapper,
    Class<?> outputKeyClass,
    Class<?> outputValueClass, Job job,
    boolean addDependencyJars,
    boolean initCredentials) throws IOException {
  job.setInputFormatClass(MultiTableInputFormat.class);
  if (outputValueClass != null) {
    job.setMapOutputValueClass(outputValueClass);
  }
  if (outputKeyClass != null) {
    job.setMapOutputKeyClass(outputKeyClass);
  }
  job.setMapperClass(mapper);
  Configuration conf = job.getConfiguration();
  HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
  List<String> scanStrings = new ArrayList<String>();

  for (Scan scan : scans) {
    scanStrings.add(convertScanToString(scan));
  }
  job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
    scanStrings.toArray(new String[scanStrings.size()]));

  if (addDependencyJars) {
    addDependencyJars(job);
  }

  if (initCredentials) {
    initCredentials(job);
  }
}
 
Example 35
Project: ditb   File: IntegrationTestLoadAndVerify.java   Source Code and License 5 votes vote down vote up
@Override
public Job createSubmittableJob(String[] args) throws IOException {
  Job job = super.createSubmittableJob(args);
  // Call my class instead.
  job.setJarByClass(WALMapperSearcher.class);
  job.setMapperClass(WALMapperSearcher.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  return job;
}
 
Example 36
Project: mapreduce-samples   File: UnitSum.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);
        job.setMapperClass(PassMapper.class);
        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.waitForCompletion(true);
    }
 
Example 37
Project: ditb   File: TestTableMapReduceUtil.java   Source Code and License 5 votes vote down vote up
@Test
public void testInitTableMapperJob1() throws Exception {
  Configuration configuration = new Configuration();
  Job job = new Job(configuration, "tableName");
  // test 
  TableMapReduceUtil.initTableMapperJob("Table", new Scan(), Import.Importer.class, Text.class,
      Text.class, job, false, WALInputFormat.class);
  assertEquals(WALInputFormat.class, job.getInputFormatClass());
  assertEquals(Import.Importer.class, job.getMapperClass());
  assertEquals(LongWritable.class, job.getOutputKeyClass());
  assertEquals(Text.class, job.getOutputValueClass());
  assertNull(job.getCombinerClass());
  assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
}
 
Example 38
Project: aliyun-maxcompute-data-collectors   File: HdfsOdpsImportJob.java   Source Code and License 5 votes vote down vote up
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
    throws IOException {

  ArrayList<FileStatus> files = new ArrayList<FileStatus>();
  FileStatus[] dirs;
  dirs = fs.globStatus(fs.makeQualified(getInputPath()));
  for (int i = 0; (dirs != null && i < dirs.length); i++) {
    files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
    // We only check one file, so exit the loop when we have at least
    // one.
    if (files.size() > 0) {
      break;
    }
  }

  ParquetMetadata parquetMetadata;
  try {
    parquetMetadata =
        ParquetFileReader.readFooter(job.getConfiguration(),
            fs.makeQualified(files.get(0).getPath()));
  } catch (IOException e) {
    LOG.error("Wrong file format. Please check the export file's format.", e);
    throw e;
  }
  MessageType schema = parquetMetadata.getFileMetaData().getSchema();
  Schema avroSchema = new AvroSchemaConverter().convert(schema);
  DatasetDescriptor descriptor =
      new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
          .compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
  return descriptor;
}
 
Example 39
Project: ditb   File: MapreduceTestingShim.java   Source Code and License 5 votes vote down vote up
@Override
public Job newJob(Configuration conf) throws IOException {
  // Implementing:
  // return new Job(conf);
  Constructor<Job> c;
  try {
    c = Job.class.getConstructor(Configuration.class);
    return c.newInstance(conf);
  } catch (Exception e) {
    throw new IllegalStateException(
        "Failed to instantiate new Job(conf)", e);
  }
}
 
Example 40
Project: mumu-mapreduce   File: MaxTemperatureMapReduce.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) {
    if (args.length != 2) {
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);
    }

    String temperatureInput = args[0];
    String temperatureOutput = args[1];
    try {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(MaxTemperatureMapReduce.class);
        job.setJobName("MaxTemperature");
        FileInputFormat.addInputPath(job, new Path(temperatureInput));
        FileOutputFormat.setOutputPath(job, new Path(temperatureOutput));

        job.setMapperClass(MaxTemperatureMapper.class);
        job.setReducerClass(MaxTemperatureReduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        e.printStackTrace();
    }
}
 
Example 41
Project: hadoop   File: MultithreadedMapper.java   Source Code and License 5 votes vote down vote up
/**
 * Set the application's mapper class.
 * @param <K1> the map input key type
 * @param <V1> the map input value type
 * @param <K2> the map output key type
 * @param <V2> the map output value type
 * @param job the job to modify
 * @param cls the class to use as the mapper
 */
public static <K1,V1,K2,V2> 
void setMapperClass(Job job, 
                    Class<? extends Mapper<K1,V1,K2,V2>> cls) {
  if (MultithreadedMapper.class.isAssignableFrom(cls)) {
    throw new IllegalArgumentException("Can't have recursive " + 
                                       "MultithreadedMapper instances.");
  }
  job.getConfiguration().setClass(MAP_CLASS, cls, Mapper.class);
}
 
Example 42
Project: aliyun-maxcompute-data-collectors   File: HBaseBulkImportJob.java   Source Code and License 5 votes vote down vote up
@Override
protected void jobTeardown(Job job) throws IOException, ImportException {
  super.jobTeardown(job);
  // Delete the hfiles directory after we are finished.
  FileSystem fileSystem = FileSystem.get(job.getConfiguration());
  fileSystem.delete(getContext().getDestination(), true);
}
 
Example 43
Project: hadoop   File: TestCombineTextInputFormat.java   Source Code and License 5 votes vote down vote up
/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException, InterruptedException {
  Configuration conf = new Configuration(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, conf);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  Job job = Job.getInstance(conf);
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  List<InputSplit> splits = format.getSplits(job);
  assertEquals("compressed splits == 1", 1, splits.size());
  List<Text> results = readSplit(format, splits.get(0), job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}
 
Example 44
Project: aliyun-maxcompute-data-collectors   File: MySQLDumpImportJob.java   Source Code and License 5 votes vote down vote up
/**
 * Set the mapper class implementation to use in the job,
 * as well as any related configuration (e.g., map output types).
 */
protected void configureMapper(Job job, String tableName,
    String tableClassName) throws ClassNotFoundException, IOException {
  job.setMapperClass(getMapperClass());
  job.setOutputKeyClass(String.class);
  job.setOutputValueClass(NullWritable.class);
}
 
Example 45
Project: hadoop   File: TestMiniMRClientCluster.java   Source Code and License 5 votes vote down vote up
@Test
public void testJob() throws Exception {
  final Job job = createJob();
  org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job,
      inDir);
  org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job,
      new Path(outDir, "testJob"));
  assertTrue(job.waitForCompletion(true));
  validateCounters(job.getCounters(), 5, 25, 5, 5);
}
 
Example 46
Project: hadoop   File: ExecutionSummarizer.java   Source Code and License 5 votes vote down vote up
private void processJobState(JobStats stats) {
  Job job = stats.getJob();
  try {
    if (job.isSuccessful()) {
      ++totalSuccessfulJobs;
    } else {
      ++totalFailedJobs;
    }
  } catch (Exception e) {
    // this behavior is consistent with job-monitor which marks the job as 
    // complete (lost) if the status polling bails out
    ++totalLostJobs; 
  }
}
 
Example 47
Project: hadoop   File: TestUberAM.java   Source Code and License 5 votes vote down vote up
@Override
protected void verifyRandomWriterCounters(Job job)
    throws InterruptedException, IOException {
  super.verifyRandomWriterCounters(job);
  Counters counters = job.getCounters();
  Assert.assertEquals(3, counters.findCounter(JobCounter.NUM_UBER_SUBMAPS)
      .getValue());
  Assert.assertEquals(3,
      counters.findCounter(JobCounter.TOTAL_LAUNCHED_UBERTASKS).getValue());
}
 
Example 48
Project: aliyun-maxcompute-data-collectors   File: ImportJobBase.java   Source Code and License 5 votes vote down vote up
/**
 * Actually run the MapReduce job.
 */
@Override
protected boolean runJob(Job job) throws ClassNotFoundException, IOException,
    InterruptedException {

  PerfCounters perfCounters = new PerfCounters();
  perfCounters.startClock();

  boolean success = doSubmitJob(job);

  if (isHCatJob) {
    SqoopHCatUtilities.instance().invokeOutputCommitterForLocalMode(job);
  }

  perfCounters.stopClock();

  Counters jobCounters = job.getCounters();
  // If the job has been retired, these may be unavailable.
  if (null == jobCounters) {
    displayRetiredJobNotice(LOG);
  } else {
    perfCounters.addBytes(jobCounters.getGroup("FileSystemCounters")
      .findCounter("HDFS_BYTES_WRITTEN").getValue());
    LOG.info("Transferred " + perfCounters.toString());
    long numRecords = ConfigurationHelper.getNumMapOutputRecords(job);
    LOG.info("Retrieved " + numRecords + " records.");
  }
  return success;
}
 
Example 49
Project: hadoop   File: TestMRFieldSelection.java   Source Code and License 5 votes vote down vote up
public static void launch() throws Exception {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  int numOfInputLines = 10;

  Path outDir = new Path(testDir, "output_for_field_selection_test");
  Path inDir = new Path(testDir, "input_for_field_selection_test");

  StringBuffer inputData = new StringBuffer();
  StringBuffer expectedOutput = new StringBuffer();
  constructInputOutputData(inputData, expectedOutput, numOfInputLines);
  
  conf.set(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "-");
  conf.set(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "6,5,1-3:0-");
  conf.set(
    FieldSelectionHelper.REDUCE_OUTPUT_KEY_VALUE_SPEC, ":4,3,2,1,0,0-");
  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir,
    1, 1, inputData.toString());
  job.setMapperClass(FieldSelectionMapper.class);
  job.setReducerClass(FieldSelectionReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);

  job.waitForCompletion(true);
  assertTrue("Job Failed!", job.isSuccessful());

  //
  // Finally, we compare the reconstructed answer key with the
  // original one.  Remember, we need to ignore zero-count items
  // in the original key.
  //
  String outdata = MapReduceTestUtil.readOutput(outDir, conf);
  assertEquals("Outputs doesnt match.",expectedOutput.toString(), outdata);
  fs.delete(outDir, true);
}
 
Example 50
Project: aliyun-maxcompute-data-collectors   File: PGBulkloadExportJob.java   Source Code and License 5 votes vote down vote up
@Override
protected void propagateOptionsToJob(Job job) {
  super.propagateOptionsToJob(job);
  SqoopOptions opts = context.getOptions();
  Configuration conf = job.getConfiguration();
  conf.setIfUnset("pgbulkload.bin", "pg_bulkload");
  if (opts.getNullStringValue() != null) {
    conf.set("pgbulkload.null.string", opts.getNullStringValue());
  }
  setDelimiter("pgbulkload.input.field.delim",
               opts.getInputFieldDelim(),
               conf);
  setDelimiter("pgbulkload.input.record.delim",
               opts.getInputRecordDelim(),
               conf);
  setDelimiter("pgbulkload.input.enclosedby",
               opts.getInputEnclosedBy(),
               conf);
  setDelimiter("pgbulkload.input.escapedby",
               opts.getInputEscapedBy(),
               conf);
  conf.setBoolean("pgbulkload.input.encloserequired",
                  opts.isInputEncloseRequired());
  conf.setIfUnset("pgbulkload.check.constraints", "YES");
  conf.setIfUnset("pgbulkload.parse.errors", "INFINITE");
  conf.setIfUnset("pgbulkload.duplicate.errors", "INFINITE");
  conf.set("mapred.jar", context.getJarFile());
  conf.setBoolean("mapred.map.tasks.speculative.execution", false);
  conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
  conf.setInt("mapred.map.max.attempts", 1);
  conf.setInt("mapred.reduce.max.attempts", 1);
}
 
Example 51
Project: ditb   File: TestTableMapReduceUtil.java   Source Code and License 5 votes vote down vote up
@Test
public void testInitTableMapperJob2() throws Exception {
  Configuration configuration = new Configuration();
  Job job = new Job(configuration, "tableName");
  TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("Table"), new Scan(),
      Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class);
  assertEquals(WALInputFormat.class, job.getInputFormatClass());
  assertEquals(Import.Importer.class, job.getMapperClass());
  assertEquals(LongWritable.class, job.getOutputKeyClass());
  assertEquals(Text.class, job.getOutputValueClass());
  assertNull(job.getCombinerClass());
  assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
}
 
Example 52
Project: ditb   File: IndexBuilder.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if(otherArgs.length < 3) {
    System.err.println("Only " + otherArgs.length + " arguments supplied, required: 3");
    System.err.println("Usage: IndexBuilder <TABLE_NAME> <COLUMN_FAMILY> <ATTR> [<ATTR> ...]");
    System.exit(-1);
  }
  Job job = configureJob(conf, otherArgs);
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 53
Project: ditb   File: VerifyReplication.java   Source Code and License 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  Configuration conf = this.getConf();
  Job job = createSubmittableJob(conf, args);
  if (job != null) {
    return job.waitForCompletion(true) ? 0 : 1;
  } 
  return 1;
}
 
Example 54
Project: aliyun-maxcompute-data-collectors   File: ConfigurationHelper.java   Source Code and License 5 votes vote down vote up
/**
 * Set the (hinted) number of map tasks for a job.
 */
public static void setJobNumMaps(Job job, int numMapTasks) {
  if (isLocalJobTracker(job.getConfiguration())) {
    numLocalModeMaps = numMapTasks;
  } else {
    job.getConfiguration().setInt(
      ConfigurationConstants.PROP_MAPRED_MAP_TASKS, numMapTasks);
  }
}
 
Example 55
Project: aliyun-maxcompute-data-collectors   File: JobBase.java   Source Code and License 5 votes vote down vote up
/**
 * Configure the number of map tasks to use in the job.
 */
protected int configureNumMapTasks(Job job) throws IOException {
  int numMapTasks = options.getNumMappers();
  if (numMapTasks < 1) {
    numMapTasks = SqoopOptions.DEFAULT_NUM_MAPPERS;
    LOG.warn("Invalid mapper count; using " + numMapTasks + " mappers.");
  }
  ConfigurationHelper.setJobNumMaps(job, numMapTasks);
  return numMapTasks;
}
 
Example 56
Project: hadoop   File: TestMapReduceJobControlWithMocks.java   Source Code and License 5 votes vote down vote up
private Job createJob(boolean complete, boolean successful)
	throws IOException, InterruptedException {
  // Create a stub Job that responds in a controlled way
  Job mockJob = mock(Job.class);
  when(mockJob.getConfiguration()).thenReturn(new Configuration());
  when(mockJob.isComplete()).thenReturn(complete);
  when(mockJob.isSuccessful()).thenReturn(successful);
  return mockJob;
}
 
Example 57
Project: hadoop   File: SleepJob.java   Source Code and License 5 votes vote down vote up
@Override
public Job call()
  throws IOException, InterruptedException, ClassNotFoundException {
  ugi.doAs(
    new PrivilegedExceptionAction<Job>() {
      public Job run()
        throws IOException, ClassNotFoundException, InterruptedException {
        job.setMapperClass(SleepMapper.class);
        job.setReducerClass(SleepReducer.class);
        job.setNumReduceTasks((mapTasksOnly) ? 0 : jobdesc.getNumberReduces());
        job.setMapOutputKeyClass(GridmixKey.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setSortComparatorClass(GridmixKey.Comparator.class);
        job.setGroupingComparatorClass(SpecGroupingComparator.class);
        job.setInputFormatClass(SleepInputFormat.class);
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setPartitionerClass(DraftPartitioner.class);
        job.setJarByClass(SleepJob.class);
        job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true);
        job.submit();
        return job;

      }
    });

  return job;
}
 
Example 58
Project: aliyun-maxcompute-data-collectors   File: DBOutputFormat.java   Source Code and License 5 votes vote down vote up
/**
 * Initializes the reduce-part of the job with
 * the appropriate output settings.
 *
 * @param job The job
 * @param tableName The table to insert data into
 * @param fieldNames The field names in the table.
 */
public static void setOutput(Job job, String tableName,
    String... fieldNames) throws IOException {
  if (fieldNames.length > 0 && fieldNames[0] != null) {
    DBConfiguration dbConf = setOutput(job, tableName);
    dbConf.setOutputFieldNames(fieldNames);
  } else {
    if (fieldNames.length > 0) {
      setOutput(job, tableName, fieldNames.length);
    } else {
      throw new IllegalArgumentException(
          "Field names must be greater than 0");
    }
  }
}
 
Example 59
Project: hadoop   File: TestCombineFileInputFormat.java   Source Code and License 5 votes vote down vote up
/**
 * Test when input files are from non-default file systems
 */
@Test
public void testForNonDefaultFileSystem() throws Throwable {
  Configuration conf = new Configuration();

  // use a fake file system scheme as default
  conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, DUMMY_FS_URI);

  // default fs path
  assertEquals(DUMMY_FS_URI, FileSystem.getDefaultUri(conf).toString());
  // add a local file
  Path localPath = new Path("testFile1");
  FileSystem lfs = FileSystem.getLocal(conf);
  FSDataOutputStream dos = lfs.create(localPath);
  dos.writeChars("Local file for CFIF");
  dos.close();

  Job job = Job.getInstance(conf);
  FileInputFormat.setInputPaths(job, lfs.makeQualified(localPath));
  DummyInputFormat inFormat = new DummyInputFormat();
  List<InputSplit> splits = inFormat.getSplits(job);
  assertTrue(splits.size() > 0);
  for (InputSplit s : splits) {
    CombineFileSplit cfs = (CombineFileSplit)s;
    for (Path p : cfs.getPaths()) {
      assertEquals(p.toUri().getScheme(), "file");
    }
  }
}
 
Example 60
Project: ditb   File: IntegrationTestBigLinkedList.java   Source Code and License 5 votes vote down vote up
public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMuplitplier) throws Exception {
  LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
  createSchema();
  Job job = Job.getInstance(getConf());

  job.setJobName("Link Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  FileInputFormat.setInputPaths(job, tmpOutput);
  job.setInputFormatClass(OneFilePerMapperSFIF.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);

  setMapperForGenerator(job);

  job.setOutputFormatClass(NullOutputFormat.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}