Java Code Examples for org.apache.hadoop.mapred.JobClient

The following examples show how to use org.apache.hadoop.mapred.JobClient. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: NNBench.java    License: Apache License 2.0 8 votes vote down vote up
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}
 
Example 2
Source Project: hadoop-gpu   Source File: TestDatamerge.java    License: Apache License 2.0 6 votes vote down vote up
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapred.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
Example 3
Source Project: hadoop   Source File: TestDFSIO.java    License: Apache License 2.0 6 votes vote down vote up
private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example 4
Source Project: hadoop   Source File: TestDatamerge.java    License: Apache License 2.0 6 votes vote down vote up
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
Example 5
Source Project: RDFS   Source File: DFSGeneralTest.java    License: Apache License 2.0 6 votes vote down vote up
private void verifyFiles(FileSystem fs) 
    throws IOException {
  Path inputPath = new Path(input, "filelists");
  Path outputPath = new Path(dfs_output, "verify_results");
  if (!fs.exists(inputPath)) {
    System.out.println("Couldn't find " + inputPath + " Skip verification.");
    return;
  }
  System.out.println("-------------------");
  System.out.println("VERIFY FILES");
  System.out.println("-------------------");
  JobConf conf = new JobConf(fsConfig, DFSGeneralTest.class);
  conf.set(THREAD_CLASS_KEY, "org.apache.hadoop.mapred.GenReaderThread");
  testtype = GenReaderThread.TEST_TYPE;
  conf.set(TEST_TYPE_KEY, testtype);
  conf.setMapperClass(GenMapper.class);
  conf.setReducerClass(GenReduce.class);
  conf.setJobName(getUniqueName("gentest-verify-" + testtype));
  output = getUniqueName(OUTPUT + testtype);
  updateJobConf(conf, inputPath, outputPath);
  long startTime = System.currentTimeMillis();
  JobClient.runJob(conf);
  long endTime = System.currentTimeMillis();
  printResult(fs, new Path(output, "results"), startTime, endTime);
}
 
Example 6
private void cancelMRJobTrackerToken(
    final Token<? extends TokenIdentifier> t, String userToProxy)
    throws HadoopSecurityManagerException {
  try {
    getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() {
      @SuppressWarnings("unchecked")
      @Override
      public Void run() throws Exception {
        cancelToken((Token<DelegationTokenIdentifier>) t);
        return null;
      }

      private void cancelToken(Token<DelegationTokenIdentifier> jt)
          throws IOException, InterruptedException {
        JobConf jc = new JobConf(conf);
        JobClient jobClient = new JobClient(jc);
        jobClient.cancelDelegationToken(jt);
      }
    });
  } catch (Exception e) {
    e.printStackTrace();
    throw new HadoopSecurityManagerException("Failed to cancel Token. "
        + e.getMessage() + e.getCause());
  }
}
 
Example 7
Source Project: RDFS   Source File: TeraValidate.java    License: Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}
 
Example 8
Source Project: hiped2   Source File: Main.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String... args) throws Exception {

    JobConf job = new JobConf();
    job.setJarByClass(Main.class);

    String input = args[0];
    Path output = new Path(args[1]);

    output.getFileSystem(job).delete(output, true);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextTaggedMapOutput.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    JobClient.runJob(job);
  }
 
Example 9
Source Project: RDFS   Source File: TestNoJobSetupCleanup.java    License: Apache License 2.0 6 votes vote down vote up
private Job submitAndValidateJob(JobConf conf, int numMaps, int numReds)
    throws IOException, InterruptedException, ClassNotFoundException {
  conf.setJobSetupCleanupNeeded(false);
  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir,
              numMaps, numReds);

  job.setOutputFormatClass(MyOutputFormat.class);
  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());
  JobID jobid = (org.apache.hadoop.mapred.JobID)job.getID();

  JobClient jc = new JobClient(conf);
  assertTrue(jc.getSetupTaskReports(jobid).length == 0);
  assertTrue(jc.getCleanupTaskReports(jobid).length == 0);
  assertTrue(jc.getMapTaskReports(jobid).length == numMaps);
  assertTrue(jc.getReduceTaskReports(jobid).length == numReds);
  FileSystem fs = FileSystem.get(conf);
  assertTrue("Job output directory doesn't exit!", fs.exists(outDir));
  FileStatus[] list = fs.listStatus(outDir, new OutputFilter());
  int numPartFiles = numReds == 0 ? numMaps : numReds;
  assertTrue("Number of part-files is " + list.length + " and not "
      + numPartFiles, list.length == numPartFiles);
  return job;
}
 
Example 10
Source Project: spork   Source File: MapReduceLauncher.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void killJob(String jobID, Configuration conf) throws BackendException {
    try {
        if (conf != null) {
            JobConf jobConf = new JobConf(conf);
            JobClient jc = new JobClient(jobConf);
            JobID id = JobID.forName(jobID);
            RunningJob job = jc.getJob(id);
            if (job == null)
                System.out.println("Job with id " + jobID + " is not active");
            else
            {
                job.killJob();
                log.info("Kill " + id + " submitted.");
            }
        }
    } catch (IOException e) {
        throw new BackendException(e);
    }
}
 
Example 11
Source Project: big-c   Source File: DataJoinJob.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}
 
Example 12
Source Project: attic-apex-malhar   Source File: LineIndexer.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * The actual main() method for our program; this is the
 * "driver" for the MapReduce job.
 */
public static void main(String[] args)
{
  JobClient client = new JobClient();
  JobConf conf = new JobConf(LineIndexer.class);

  conf.setJobName("LineIndexer");

  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.addInputPath(conf, new Path("input"));
  FileOutputFormat.setOutputPath(conf, new Path("output"));

  conf.setMapperClass(LineIndexMapper.class);
  conf.setReducerClass(LineIndexReducer.class);

  client.setConf(conf);

  try {
    JobClient.runJob(conf);
  } catch (Exception e) {
    e.printStackTrace();
  }
}
 
Example 13
Source Project: spork   Source File: SimplePigStats.java    License: Apache License 2.0 6 votes vote down vote up
void initialize(PigContext pigContext, JobClient jobClient,
        JobControlCompiler jcc, MROperPlan mrPlan) {
    super.start();

    if (pigContext == null || jobClient == null || jcc == null) {
        LOG.warn("invalid params: " + pigContext + jobClient + jcc);
        return;
    }

    this.pigContext = pigContext;
    this.jobClient = jobClient;
    this.jcc = jcc;

    // build job DAG with job ids assigned to null
    try {
        new JobGraphBuilder(mrPlan).visit();
    } catch (VisitorException e) {
        LOG.warn("unable to build job plan", e);
    }
}
 
Example 14
Source Project: anthelion   Source File: SolrClean.java    License: Apache License 2.0 6 votes vote down vote up
public void delete(String crawldb, String solrUrl, boolean noCommit) throws IOException {
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("SolrClean: starting at " + sdf.format(start));

  JobConf job = new NutchJob(getConf());

  FileInputFormat.addInputPath(job, new Path(crawldb, CrawlDb.CURRENT_NAME));
  job.setBoolean("noCommit", noCommit);
  job.set(SolrConstants.SERVER_URL, solrUrl);
  job.setInputFormat(SequenceFileInputFormat.class);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapOutputKeyClass(ByteWritable.class);
  job.setMapOutputValueClass(Text.class);
  job.setMapperClass(DBFilter.class);
  job.setReducerClass(SolrDeleter.class);

  JobClient.runJob(job);

  long end = System.currentTimeMillis();
  LOG.info("SolrClean: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
}
 
Example 15
Source Project: big-c   Source File: TestDatamerge.java    License: Apache License 2.0 6 votes vote down vote up
public void testEmptyJoin() throws Exception {
  JobConf job = new JobConf();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
      Fake_IF.class, src));
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(IdentityMapper.class);
  job.setReducerClass(IdentityReducer.class);
  job.setOutputKeyClass(IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
Example 16
Source Project: RDFS   Source File: NNBench.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests(Configuration config) throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}
 
Example 17
Source Project: hbase   Source File: TestTableMapReduceUtil.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  try {
    jobConf.setJobName("process row task");
    jobConf.setNumReduceTasks(1);
    TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
        ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
        jobConf);
    TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
        ClassificatorRowReduce.class, jobConf);
    RunningJob job = JobClient.runJob(jobConf);
    assertTrue(job.isSuccessful());
  } finally {
    if (jobConf != null)
      FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
  }
}
 
Example 18
public Map<String, Object> getJobDetails(JobClient jobClient, String jobId)
		throws AnkushException {
	String errMsg = "Unable to getch Hadoop jobs details, could not connect to Hadoop JobClient.";
	try {
		if (jobClient != null) {
			// Get the jobs that are submitted.
			JobStatus[] jobStatus = jobClient.getAllJobs();
			for (JobStatus jobSts : jobStatus) {

			}
		}
	} catch (Exception e) {
		HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg,
				Constant.Component.Name.HADOOP, e);
		throw new AnkushException(errMsg);
	}
	return null;
}
 
Example 19
Source Project: big-c   Source File: NNBench.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}
 
Example 20
Source Project: hadoop-gpu   Source File: TeraValidate.java    License: Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}
 
Example 21
Source Project: RDFS   Source File: TestDatamerge.java    License: Apache License 2.0 6 votes vote down vote up
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapred.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
Example 22
Source Project: aerospike-hadoop   Source File: WordCountInput.java    License: Apache License 2.0 6 votes vote down vote up
public int run(final String[] args) throws Exception {

        log.info("run starting");

        final Configuration conf = getConf();

        JobConf job = new JobConf(conf, WordCountInput.class);
        job.setJobName("AerospikeWordCountInput");

        job.setInputFormat(AerospikeInputFormat.class);
        job.setMapperClass(Map.class);
        job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormat(TextOutputFormat.class);

        FileOutputFormat.setOutputPath(job, new Path(args[0]));

        JobClient.runJob(job);

        log.info("finished");
        return 0;
    }
 
Example 23
Source Project: emr-dynamodb-connector   Source File: ReadIopsCalculator.java    License: Apache License 2.0 6 votes vote down vote up
public ReadIopsCalculator(JobClient jobClient, DynamoDBClient dynamoDBClient, String tableName,
    int totalSegments, int localSegments) {
  this.jobConf = (JobConf) jobClient.getConf();
  this.jobClient = jobClient;

  this.dynamoDBClient = dynamoDBClient;
  this.tableName = tableName;
  this.totalSegments = totalSegments;
  this.localSegments = localSegments;

  this.throughputPercent = Double.parseDouble(jobConf.get(DynamoDBConstants
      .THROUGHPUT_READ_PERCENT, DynamoDBConstants.DEFAULT_THROUGHPUT_PERCENTAGE));

  log.info("Table name: " + tableName);
  log.info("Throughput percent: " + throughputPercent);
}
 
Example 24
Source Project: hadoop-gpu   Source File: DataJoinJob.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}
 
Example 25
Source Project: systemds   Source File: InfrastructureAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Analyzes properties of hadoop cluster and configuration.
 */
private static void analyzeHadoopCluster() {
	try {
		JobConf job = ConfigurationManager.getCachedJobConf();
		JobClient client = new JobClient(job);
		ClusterStatus stat = client.getClusterStatus();
		if( stat != null ) { //if in cluster mode
			//analyze cluster status
			_remotePar = stat.getTaskTrackers();
			_remoteParMap = stat.getMaxMapTasks(); 
			_remoteParReduce = stat.getMaxReduceTasks(); 
			
			//analyze pure configuration properties
			analyzeHadoopConfiguration();
		}
	} 
	catch (IOException e) {
		throw new RuntimeException("Unable to analyze infrastructure.",e);
	}
}
 
Example 26
Source Project: RDFS   Source File: TeraSort.java    License: Apache License 2.0 5 votes vote down vote up
public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  long startTime = System.currentTimeMillis();
  JobClient.runJob(job);
  long endTime = System.currentTimeMillis();
  System.out.println((float)(endTime-startTime)/1000);
  LOG.info("done");
  return 0;
}
 
Example 27
Source Project: RDFS   Source File: AggregateWordCount.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * The main driver for word count map/reduce program. Invoke this method to
 * submit the map/reduce job.
 * 
 * @throws IOException
 *           When there is communication problems with the job tracker.
 */
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException {
  JobConf conf = ValueAggregatorJob.createValueAggregatorJob(args
      , new Class[] {WordCountPlugInClass.class});
 
  JobClient.runJob(conf);
}
 
Example 28
Source Project: hadoop   Source File: TestEncryptedShuffle.java    License: Apache License 2.0 5 votes vote down vote up
private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}
 
Example 29
Source Project: hadoop   Source File: TestMROldApiJobs.java    License: Apache License 2.0 5 votes vote down vote up
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps, 
                         int numReds) throws IOException, InterruptedException {

  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (!fs.exists(inDir)) {
    fs.mkdirs(inDir);
  }
  String input = "The quick brown fox\n" + "has many silly\n"
      + "red fox sox\n";
  for (int i = 0; i < numMaps; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }

  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);

  JobClient jobClient = new JobClient(conf);
  
  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}
 
Example 30
Source Project: hbase-indexer   Source File: BatchStateUpdater.java    License: Apache License 2.0 5 votes vote down vote up
protected BatchStateUpdater(String indexerName, WriteableIndexerModel indexerModel, JobClient jobClient,
                  ScheduledExecutorService executor, int pollInterval) {
    this.indexerModel = indexerModel;
    this.jobClient = jobClient;
    this.executor = executor;
    this.pollInterval = pollInterval;
    this.indexerName = indexerName;
}