Java Code Examples for org.apache.hadoop.mapred.JobConf#setJarByClass()

The following examples show how to use org.apache.hadoop.mapred.JobConf#setJarByClass() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TeraGen.java    From hadoop-book with Apache License 2.0 6 votes vote down vote up
/**
 * @param args the cli arguments
 */
public int run(String[] args) throws IOException {
  JobConf job = (JobConf) getConf();
  setNumberOfRows(job, Long.parseLong(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraGen");
  job.setJarByClass(TeraGen.class);
  job.setMapperClass(SortGenMapper.class);
  job.setNumReduceTasks(0);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(RangeInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  JobClient.runJob(job);
  return 0;
}
 
Example 2
Source File: ReadExistingDataJob.java    From tracing-framework with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void configure(JobConf job) {
    // Set the mapper and reducers
    job.setMapperClass(ReadDataJob.TestMapper.class);

    // Make sure this jar is included
    job.setJarByClass(ReadDataJob.TestMapper.class);

    // Specify the input and output data formats
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    // Turn off speculative execution
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    // Add the job input path
    FileInputFormat.addInputPath(job, new Path(this.input_path));
}
 
Example 3
Source File: ReadDataJob.java    From tracing-framework with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void configure(JobConf job) {
    // Set the mapper and reducers
    job.setMapperClass(TestMapper.class);
    // job.setReducerClass(TestReducer.class);

    // Set the output types of the mapper and reducer
    // job.setMapOutputKeyClass(IntWritable.class);
    // job.setMapOutputValueClass(NullWritable.class);
    // job.setOutputKeyClass(NullWritable.class);
    // job.setOutputValueClass(NullWritable.class);

    // Make sure this jar is included
    job.setJarByClass(TestMapper.class);

    // Specify the input and output data formats
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    // Turn off speculative execution
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    // Add the job input path
    FileInputFormat.addInputPath(job, new Path(this.input_filename));
}
 
Example 4
Source File: DataFsck.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private JobConf createJobConf() {
  JobConf jobConf = new JobConf(getConf());
  String jobName = NAME + " " + dateForm.format(new Date(System.currentTimeMillis()));
  jobConf.setJobName(jobName);
  jobConf.setMapSpeculativeExecution(false);

  jobConf.setJarByClass(DataFsck.class);
  jobConf.setInputFormat(DataFsckInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(Text.class);

  jobConf.setMapperClass(DataFsckMapper.class);
  jobConf.setNumReduceTasks(0);
  return jobConf;
}
 
Example 5
Source File: TeraValidate.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}
 
Example 6
Source File: TeraGen.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * @param args the cli arguments
 */
public int run(String[] args) throws IOException {
  JobConf job = (JobConf) getConf();
  setNumberOfRows(job, Long.parseLong(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraGen");
  job.setJarByClass(TeraGen.class);
  job.setMapperClass(SortGenMapper.class);
  job.setNumReduceTasks(0);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(RangeInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  JobClient.runJob(job);
  return 0;
}
 
Example 7
Source File: TeraValidate.java    From hadoop-book with Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}
 
Example 8
Source File: Main.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
public static void main(String... args) throws Exception {

    JobConf job = new JobConf();
    job.setJarByClass(Main.class);

    String input = args[0];
    Path output = new Path(args[1]);

    output.getFileSystem(job).delete(output, true);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextTaggedMapOutput.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    JobClient.runJob(job);
  }
 
Example 9
Source File: TestMROldApiJobs.java    From big-c with Apache License 2.0 5 votes vote down vote up
public static void runJobFail(JobConf conf, Path inDir, Path outDir)
       throws IOException, InterruptedException {
  conf.setJobName("test-job-fail");
  conf.setMapperClass(FailMapper.class);
  conf.setJarByClass(FailMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setMaxMapAttempts(1);
  
  boolean success = runJob(conf, inDir, outDir, 1, 0);
  Assert.assertFalse("Job expected to fail succeeded", success);
}
 
Example 10
Source File: TeraSort.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  long startTime = System.currentTimeMillis();
  JobClient.runJob(job);
  long endTime = System.currentTimeMillis();
  System.out.println((float)(endTime-startTime)/1000);
  LOG.info("done");
  return 0;
}
 
Example 11
Source File: YarnTestBinding.java    From mr4c with Apache License 2.0 5 votes vote down vote up
public JobConf createTestMRJobConf() throws IOException {
	if ( m_mrCluster==null ) {
		startMrCluster();
	}
	JobConf job = new JobConf(m_mrCluster.getConfig());
	job.setJarByClass(AlgoRunner.class);
	return job;
}
 
Example 12
Source File: MRv1TestBinding.java    From mr4c with Apache License 2.0 5 votes vote down vote up
public synchronized JobConf createTestMRJobConf() throws IOException {
	if ( m_mrCluster==null ) {
		startMRCluster();
	}
	JobConf job = m_mrCluster.createJobConf();
	job.setJarByClass(AlgoRunner.class);
	return job;
}
 
Example 13
Source File: CompositeUserJoin.java    From hadoop-map-reduce-patterns with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
	if (args.length != 4) {
		printUsage();
	}
	Path userPath = new Path(args[0]);
	Path commentPath = new Path(args[1]);
	Path outputDir = new Path(args[2]);
	String joinType = args[3];
	JobConf conf = new JobConf("CompositeJoin");
	conf.setJarByClass(CompositeUserJoin.class);
	conf.setMapperClass(CompositeMapper.class);
	conf.setNumReduceTasks(0);
	// Set the input format class to a CompositeInputFormat class.
	// The CompositeInputFormat will parse all of our input files and output
	// records to our mapper.
	conf.setInputFormat(CompositeInputFormat.class);
	// The composite input format join expression will set how the records
	// are going to be read in, and in what input format.
	conf.set("mapred.join.expr", CompositeInputFormat.compose(joinType,
			KeyValueTextInputFormat.class, userPath, commentPath));
	TextOutputFormat.setOutputPath(conf, outputDir);
	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(Text.class);
	RunningJob job = JobClient.runJob(conf);
	while (!job.isComplete()) {
		Thread.sleep(1000);
	}
	return job.isSuccessful() ? 0 : 1;
}
 
Example 14
Source File: RandomWriter.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return -1;
  }
  
  Path outDir = new Path(args[0]);
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  
  job.setInputFormat(RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  job.setReducerClass(IdentityReducer.class);
  job.setOutputFormat(SequenceFileOutputFormat.class);
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
  }
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}
 
Example 15
Source File: RandomTextWriter.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormat(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite);
  }
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormat(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}
 
Example 16
Source File: TestTableSnapshotInputFormat.java    From hbase with Apache License 2.0 4 votes vote down vote up
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
    String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
    int numSplitsPerRegion,int expectedNumSplits, boolean shutdownCluster) throws Exception {

  //create the table and snapshot
  createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);

  if (shutdownCluster) {
    util.shutdownMiniHBaseCluster();
  }

  try {
    // create the job
    JobConf jobConf = new JobConf(util.getConfiguration());

    jobConf.setJarByClass(util.getClass());
    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf,
      TestTableSnapshotInputFormat.class);

    if(numSplitsPerRegion > 1) {
      TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
              TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
              NullWritable.class, jobConf, true, tableDir, new RegionSplitter.UniformSplit(),
              numSplitsPerRegion);
    } else {
      TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
              TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
              NullWritable.class, jobConf, true, tableDir);
    }

    jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
    jobConf.setNumReduceTasks(1);
    jobConf.setOutputFormat(NullOutputFormat.class);

    RunningJob job = JobClient.runJob(jobConf);
    Assert.assertTrue(job.isSuccessful());
  } finally {
    if (!shutdownCluster) {
      util.getAdmin().deleteSnapshot(snapshotName);
      util.deleteTable(tableName);
    }
  }
}
 
Example 17
Source File: RandomWriter.java    From hadoop-book with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job. It
 * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
 * reduce doesn't do anything.
 *
 * @throws IOException
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.out.println("Usage: writer <out-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Path outDir = new Path(args[0]);
    JobConf job = new JobConf(getConf());

    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(IdentityReducer.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    JobClient client = new JobClient(job);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
    long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
            1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
        return -2;
    }
    long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes",
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
    }

    job.setNumMapTasks(numMaps);
    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + (endTime.getTime() - startTime.getTime()) / 1000
            + " seconds.");

    return 0;
}
 
Example 18
Source File: OutputToText.java    From wikireverse with MIT License 4 votes vote down vote up
public int run(String[] args) throws Exception {
	// Get current configuration.
	Configuration conf = getConf();

	// Parse command line arguments.
	String inputPaths = args[0];
	String outputPath = args[1];

	JobConf job = new JobConf(conf);

	// Set input path.
	if (inputPaths.length() > 0) {
		List<String> segmentPaths = Lists.newArrayList(Splitter.on(",")
				.split(inputPaths));

		for (String segmentPath : segmentPaths) {
			LOG.info("Adding input path " + segmentPath);
			FileInputFormat.addInputPath(job, new Path(segmentPath));
		}
	} else {
		System.err.println("No input path found.");
		return 1;
	}

	// Set output path.
	if (outputPath.length() > 0) {
		LOG.info("Setting output path to " + outputPath);
		TextOutputFormat.setOutputPath(job, new Path(outputPath));
		// Compress output to boost performance.
		TextOutputFormat.setCompressOutput(job, true);
		TextOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
	} else {
		System.err.println("No output path found.");
		return 1;
	}

	// Load other classes from same jar as this class.
	job.setJarByClass(OutputToText.class);

	// Input is Hadoop sequence file format.
	job.setInputFormat(SequenceFileInputFormat.class);

	// Output is text format for import into database later.
	job.setOutputFormat(TextOutputFormat.class);

	// Set the output data types.
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);

	// Use custom mapper class.
	job.setMapperClass(OutputToTextMapper.class);

	// Use standard reducer class.
	job.setReducerClass(IdentityReducer.class);

	if (JobClient.runJob(job).isSuccessful())
		return 0;
	else
		return 1;
}
 
Example 19
Source File: SegmentCombiner.java    From wikireverse with MIT License 4 votes vote down vote up
public int run(String[] args) throws Exception {
	// Get current configuration.
	Configuration conf = getConf();

	// Parse command line arguments.
	String inputPaths = args[0];
	String outputPath = args[1];

	JobConf job = new JobConf(conf);

	// Set input path.
	if (inputPaths.length() > 0) {
		List<String> segmentPaths = Lists.newArrayList(Splitter.on(",")
				.split(inputPaths));

		for (String segmentPath : segmentPaths) {
			LOG.info("Adding input path " + segmentPath);
			FileInputFormat.addInputPath(job, new Path(segmentPath));
		}
	} else {
		System.err.println("No input path found.");
		return 1;
	}

	// Set output path.
	if (outputPath.length() > 0) {
		LOG.info("Setting output path to " + outputPath);
		SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
		// Compress output to boost performance.
		SequenceFileOutputFormat.setCompressOutput(job, true);
		SequenceFileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
	} else {
		System.err.println("No output path found.");
		return 1;
	}

	// Load other classes from same jar as this class.
	job.setJarByClass(SegmentCombiner.class);

	// Input is Hadoop sequence file format.
	job.setInputFormat(SequenceFileInputFormat.class);

	// Output is Hadoop sequence file format.
	job.setOutputFormat(SequenceFileOutputFormat.class);

	// Set the output data types.
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(LinkArrayWritable.class);

	// Use custom mapper class.
	job.setMapperClass(SegmentCombinerMapper.class);

	// Use custom reducer class.
	job.setReducerClass(LinkArrayReducer.class);

	if (JobClient.runJob(job).isSuccessful())
		return 0;
	else
		return 1;
}
 
Example 20
Source File: GenericMRLoadJobCreator.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public static JobConf createJob(String[] argv, boolean mapoutputCompressed,
    boolean outputCompressed) throws Exception {

  JobConf job = new JobConf();
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return null;
  }

  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormat(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != job.getClass("mapred.indirect.input.format", null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(job);
    Path sysdir = jClient.getSystemDir();
    Random r = new Random();
    Path indirInputFile = new Path(sysdir, Integer.toString(r
        .nextInt(Integer.MAX_VALUE), 36)
        + "_files");
    job.set("mapred.indirect.input.file", indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(sysdir
        .getFileSystem(job), job, indirInputFile, LongWritable.class,
        Text.class, SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(job);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDir()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()), new Text(stat
                  .getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  job.setCompressMapOutput(mapoutputCompressed);
  job.setBoolean("mapred.output.compress", outputCompressed);
  return job;

}