org.apache.hadoop.mapred.JobClient Java Exaples

Source File: NNBench.java From hadoop with Apache License 2.0

8 votes

/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}

Source File: SolrClean.java From anthelion with Apache License 2.0

7 votes

public void delete(String crawldb, String solrUrl, boolean noCommit) throws IOException {
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("SolrClean: starting at " + sdf.format(start));

  JobConf job = new NutchJob(getConf());

  FileInputFormat.addInputPath(job, new Path(crawldb, CrawlDb.CURRENT_NAME));
  job.setBoolean("noCommit", noCommit);
  job.set(SolrConstants.SERVER_URL, solrUrl);
  job.setInputFormat(SequenceFileInputFormat.class);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapOutputKeyClass(ByteWritable.class);
  job.setMapOutputValueClass(Text.class);
  job.setMapperClass(DBFilter.class);
  job.setReducerClass(SolrDeleter.class);

  JobClient.runJob(job);

  long end = System.currentTimeMillis();
  LOG.info("SolrClean: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
}

Source File: InfrastructureAnalyzer.java From systemds with Apache License 2.0

6 votes

/**
 * Analyzes properties of hadoop cluster and configuration.
 */
private static void analyzeHadoopCluster() {
	try {
		JobConf job = ConfigurationManager.getCachedJobConf();
		JobClient client = new JobClient(job);
		ClusterStatus stat = client.getClusterStatus();
		if( stat != null ) { //if in cluster mode
			//analyze cluster status
			_remotePar = stat.getTaskTrackers();
			_remoteParMap = stat.getMaxMapTasks(); 
			_remoteParReduce = stat.getMaxReduceTasks(); 
			
			//analyze pure configuration properties
			analyzeHadoopConfiguration();
		}
	} 
	catch (IOException e) {
		throw new RuntimeException("Unable to analyze infrastructure.",e);
	}
}

Source File: TeraValidate.java From RDFS with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}

Source File: DFSGeneralTest.java From RDFS with Apache License 2.0

6 votes

private void verifyFiles(FileSystem fs) 
    throws IOException {
  Path inputPath = new Path(input, "filelists");
  Path outputPath = new Path(dfs_output, "verify_results");
  if (!fs.exists(inputPath)) {
    System.out.println("Couldn't find " + inputPath + " Skip verification.");
    return;
  }
  System.out.println("-------------------");
  System.out.println("VERIFY FILES");
  System.out.println("-------------------");
  JobConf conf = new JobConf(fsConfig, DFSGeneralTest.class);
  conf.set(THREAD_CLASS_KEY, "org.apache.hadoop.mapred.GenReaderThread");
  testtype = GenReaderThread.TEST_TYPE;
  conf.set(TEST_TYPE_KEY, testtype);
  conf.setMapperClass(GenMapper.class);
  conf.setReducerClass(GenReduce.class);
  conf.setJobName(getUniqueName("gentest-verify-" + testtype));
  output = getUniqueName(OUTPUT + testtype);
  updateJobConf(conf, inputPath, outputPath);
  long startTime = System.currentTimeMillis();
  JobClient.runJob(conf);
  long endTime = System.currentTimeMillis();
  printResult(fs, new Path(output, "results"), startTime, endTime);
}

Source File: HadoopSecurityManager_H_1_0.java From azkaban-plugins with Apache License 2.0

6 votes

private void cancelMRJobTrackerToken(
    final Token<? extends TokenIdentifier> t, String userToProxy)
    throws HadoopSecurityManagerException {
  try {
    getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() {
      @SuppressWarnings("unchecked")
      @Override
      public Void run() throws Exception {
        cancelToken((Token<DelegationTokenIdentifier>) t);
        return null;
      }

      private void cancelToken(Token<DelegationTokenIdentifier> jt)
          throws IOException, InterruptedException {
        JobConf jc = new JobConf(conf);
        JobClient jobClient = new JobClient(jc);
        jobClient.cancelDelegationToken(jt);
      }
    });
  } catch (Exception e) {
    e.printStackTrace();
    throw new HadoopSecurityManagerException("Failed to cancel Token. "
        + e.getMessage() + e.getCause());
  }
}

Source File: TestDFSIO.java From hadoop with Apache License 2.0

6 votes

private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Source File: TestDatamerge.java From hadoop with Apache License 2.0

6 votes

private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Source File: Main.java From hiped2 with Apache License 2.0

6 votes

public static void main(String... args) throws Exception {

    JobConf job = new JobConf();
    job.setJarByClass(Main.class);

    String input = args[0];
    Path output = new Path(args[1]);

    output.getFileSystem(job).delete(output, true);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextTaggedMapOutput.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    JobClient.runJob(job);
  }

Source File: TestNoJobSetupCleanup.java From RDFS with Apache License 2.0

6 votes

private Job submitAndValidateJob(JobConf conf, int numMaps, int numReds)
    throws IOException, InterruptedException, ClassNotFoundException {
  conf.setJobSetupCleanupNeeded(false);
  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir,
              numMaps, numReds);

  job.setOutputFormatClass(MyOutputFormat.class);
  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());
  JobID jobid = (org.apache.hadoop.mapred.JobID)job.getID();

  JobClient jc = new JobClient(conf);
  assertTrue(jc.getSetupTaskReports(jobid).length == 0);
  assertTrue(jc.getCleanupTaskReports(jobid).length == 0);
  assertTrue(jc.getMapTaskReports(jobid).length == numMaps);
  assertTrue(jc.getReduceTaskReports(jobid).length == numReds);
  FileSystem fs = FileSystem.get(conf);
  assertTrue("Job output directory doesn't exit!", fs.exists(outDir));
  FileStatus[] list = fs.listStatus(outDir, new OutputFilter());
  int numPartFiles = numReds == 0 ? numMaps : numReds;
  assertTrue("Number of part-files is " + list.length + " and not "
      + numPartFiles, list.length == numPartFiles);
  return job;
}

Source File: MapReduceLauncher.java From spork with Apache License 2.0

6 votes

@Override
public void killJob(String jobID, Configuration conf) throws BackendException {
    try {
        if (conf != null) {
            JobConf jobConf = new JobConf(conf);
            JobClient jc = new JobClient(jobConf);
            JobID id = JobID.forName(jobID);
            RunningJob job = jc.getJob(id);
            if (job == null)
                System.out.println("Job with id " + jobID + " is not active");
            else
            {
                job.killJob();
                log.info("Kill " + id + " submitted.");
            }
        }
    } catch (IOException e) {
        throw new BackendException(e);
    }
}

Source File: DataJoinJob.java From big-c with Apache License 2.0

6 votes

/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}

Source File: LineIndexer.java From attic-apex-malhar with Apache License 2.0

6 votes

/**
 * The actual main() method for our program; this is the
 * "driver" for the MapReduce job.
 */
public static void main(String[] args)
{
  JobClient client = new JobClient();
  JobConf conf = new JobConf(LineIndexer.class);

  conf.setJobName("LineIndexer");

  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.addInputPath(conf, new Path("input"));
  FileOutputFormat.setOutputPath(conf, new Path("output"));

  conf.setMapperClass(LineIndexMapper.class);
  conf.setReducerClass(LineIndexReducer.class);

  client.setConf(conf);

  try {
    JobClient.runJob(conf);
  } catch (Exception e) {
    e.printStackTrace();
  }
}

Source File: TestDatamerge.java From big-c with Apache License 2.0

6 votes

public void testEmptyJoin() throws Exception {
  JobConf job = new JobConf();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
      Fake_IF.class, src));
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(IdentityMapper.class);
  job.setReducerClass(IdentityReducer.class);
  job.setOutputKeyClass(IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Source File: TestDatamerge.java From hadoop-gpu with Apache License 2.0

6 votes

private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapred.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Source File: DataJoinJob.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}

Source File: ReadIopsCalculator.java From emr-dynamodb-connector with Apache License 2.0

6 votes

public ReadIopsCalculator(JobClient jobClient, DynamoDBClient dynamoDBClient, String tableName,
    int totalSegments, int localSegments) {
  this.jobConf = (JobConf) jobClient.getConf();
  this.jobClient = jobClient;

  this.dynamoDBClient = dynamoDBClient;
  this.tableName = tableName;
  this.totalSegments = totalSegments;
  this.localSegments = localSegments;

  this.throughputPercent = Double.parseDouble(jobConf.get(DynamoDBConstants
      .THROUGHPUT_READ_PERCENT, DynamoDBConstants.DEFAULT_THROUGHPUT_PERCENTAGE));

  log.info("Table name: " + tableName);
  log.info("Throughput percent: " + throughputPercent);
}

Source File: WordCountInput.java From aerospike-hadoop with Apache License 2.0

6 votes

public int run(final String[] args) throws Exception {

        log.info("run starting");

        final Configuration conf = getConf();

        JobConf job = new JobConf(conf, WordCountInput.class);
        job.setJobName("AerospikeWordCountInput");

        job.setInputFormat(AerospikeInputFormat.class);
        job.setMapperClass(Map.class);
        job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormat(TextOutputFormat.class);

        FileOutputFormat.setOutputPath(job, new Path(args[0]));

        JobClient.runJob(job);

        log.info("finished");
        return 0;
    }

Source File: TestDatamerge.java From RDFS with Apache License 2.0

6 votes

private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapred.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Source File: TeraValidate.java From hadoop-gpu with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}

Source File: NNBench.java From big-c with Apache License 2.0

6 votes

/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}

Source File: JobStatusProvider.java From ankush with GNU Lesser General Public License v3.0

6 votes

public Map<String, Object> getJobDetails(JobClient jobClient, String jobId)
		throws AnkushException {
	String errMsg = "Unable to getch Hadoop jobs details, could not connect to Hadoop JobClient.";
	try {
		if (jobClient != null) {
			// Get the jobs that are submitted.
			JobStatus[] jobStatus = jobClient.getAllJobs();
			for (JobStatus jobSts : jobStatus) {

			}
		}
	} catch (Exception e) {
		HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg,
				Constant.Component.Name.HADOOP, e);
		throw new AnkushException(errMsg);
	}
	return null;
}

Source File: TestTableMapReduceUtil.java From hbase with Apache License 2.0

6 votes

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  try {
    jobConf.setJobName("process row task");
    jobConf.setNumReduceTasks(1);
    TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
        ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
        jobConf);
    TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
        ClassificatorRowReduce.class, jobConf);
    RunningJob job = JobClient.runJob(jobConf);
    assertTrue(job.isSuccessful());
  } finally {
    if (jobConf != null)
      FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
  }
}

Source File: NNBench.java From RDFS with Apache License 2.0

6 votes

/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests(Configuration config) throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}

Source File: SimplePigStats.java From spork with Apache License 2.0

6 votes

void initialize(PigContext pigContext, JobClient jobClient,
        JobControlCompiler jcc, MROperPlan mrPlan) {
    super.start();

    if (pigContext == null || jobClient == null || jcc == null) {
        LOG.warn("invalid params: " + pigContext + jobClient + jcc);
        return;
    }

    this.pigContext = pigContext;
    this.jobClient = jobClient;
    this.jcc = jcc;

    // build job DAG with job ids assigned to null
    try {
        new JobGraphBuilder(mrPlan).visit();
    } catch (VisitorException e) {
        LOG.warn("unable to build job plan", e);
    }
}

Source File: Statistics.java From big-c with Apache License 2.0

5 votes

public Statistics(
  final Configuration conf, int pollingInterval, CountDownLatch startFlag)
  throws IOException, InterruptedException {
    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
    this.cluster = ugi.doAs(new PrivilegedExceptionAction<JobClient>() {
      public JobClient run() throws IOException {
        return new JobClient(new JobConf(conf));
      }
    });

  this.jtPollingInterval = pollingInterval;
  maxJobCompletedInInterval = conf.getInt(
    MAX_JOBS_COMPLETED_IN_POLL_INTERVAL_KEY, 1);
  this.startFlag = startFlag;
}

Source File: BusyLegs.java From gemfirexd-oss with Apache License 2.0

5 votes

public int run(String[] args) throws Exception {

    GfxdDataSerializable.initTypes();

    JobConf conf = new JobConf(getConf());
    conf.setJobName("Busy Leg Count");

    Path outputPath = new Path(args[0]);
    String hdfsHomeDir = args[1];
    String tableName = args[2];

    outputPath.getFileSystem(conf).delete(outputPath, true);

    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);

    // Configure Mapper
    conf.setInputFormat(RowInputFormat.class);
    conf.setMapperClass(SampleMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    // Configure Reducer
    conf.setReducerClass(SampleReducer.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(conf, outputPath);

    JobClient.runJob(conf);
    return 0;
  }

Source File: SessionRollup.java From aerospike-hadoop with Apache License 2.0

5 votes

public int run(final String[] args) throws Exception {

        log.info("run starting");

        final Configuration conf = getConf();

        JobConf job = new JobConf(conf, SessionRollup.class);
        job.setJobName("AerospikeSessionRollup");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(LongWritable.class);
        // job.setCombinerClass(Reduce.class);  // Reduce changes format.
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Session.class);

        job.setOutputFormat(SessionOutputFormat.class);

        for (int ii = 0; ii < args.length; ++ii)
            FileInputFormat.addInputPath(job, new Path(args[ii]));

        JobClient.runJob(job);

        log.info("finished");
        return 0;
    }

Source File: LinkRank.java From anthelion with Apache License 2.0

5 votes

/**
 * Runs the link analysis job. The link analysis job applies the link rank
 * formula to create a score per url and stores that score in the NodeDb.
 * 
 * Typically the link analysis job is run a number of times to allow the link
 * rank scores to converge.
 * 
 * @param nodeDb The node database from which we are getting previous link
 * rank scores.
 * @param inverted The inverted inlinks
 * @param output The link analysis output.
 * @param iteration The current iteration number.
 * @param numIterations The total number of link analysis iterations
 * 
 * @throws IOException If an error occurs during link analysis.
 */
private void runAnalysis(Path nodeDb, Path inverted, Path output,
  int iteration, int numIterations, float rankOne)
  throws IOException {

  JobConf analyzer = new NutchJob(getConf());
  analyzer.set("link.analyze.iteration", String.valueOf(iteration + 1));
  analyzer.setJobName("LinkAnalysis Analyzer, iteration " + (iteration + 1)
    + " of " + numIterations);
  FileInputFormat.addInputPath(analyzer, nodeDb);
  FileInputFormat.addInputPath(analyzer, inverted);
  FileOutputFormat.setOutputPath(analyzer, output);
  analyzer.set("link.analyze.rank.one", String.valueOf(rankOne));
  analyzer.setMapOutputKeyClass(Text.class);
  analyzer.setMapOutputValueClass(ObjectWritable.class);
  analyzer.setInputFormat(SequenceFileInputFormat.class);
  analyzer.setMapperClass(Analyzer.class);
  analyzer.setReducerClass(Analyzer.class);
  analyzer.setOutputKeyClass(Text.class);
  analyzer.setOutputValueClass(Node.class);
  analyzer.setOutputFormat(MapFileOutputFormat.class);
  analyzer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  LOG.info("Starting analysis job");
  try {
    JobClient.runJob(analyzer);
  }
  catch (IOException e) {
    LOG.error(StringUtils.stringifyException(e));
    throw e;
  }
  LOG.info("Finished analysis job.");
}

Source File: TestGridmixSummary.java From hadoop with Apache License 2.0

5 votes

/**
 * Test {@link ClusterSummarizer}.
 */
@Test  (timeout=20000)
public void testClusterSummarizer() throws IOException {
  ClusterSummarizer cs = new ClusterSummarizer();
  Configuration conf = new Configuration();
  
  String jt = "test-jt:1234";
  String nn = "test-nn:5678";
  conf.set(JTConfig.JT_IPC_ADDRESS, jt);
  conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, nn);
  cs.start(conf);
  
  assertEquals("JT name mismatch", jt, cs.getJobTrackerInfo());
  assertEquals("NN name mismatch", nn, cs.getNamenodeInfo());
  
  ClusterStats cStats = ClusterStats.getClusterStats();
  conf.set(JTConfig.JT_IPC_ADDRESS, "local");
  conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "local");
  JobClient jc = new JobClient(conf);
  cStats.setClusterMetric(jc.getClusterStatus());
  
  cs.update(cStats);
  
  // test
  assertEquals("Cluster summary test failed!", 1, cs.getMaxMapTasks());
  assertEquals("Cluster summary test failed!", 1, cs.getMaxReduceTasks());
  assertEquals("Cluster summary test failed!", 1, cs.getNumActiveTrackers());
  assertEquals("Cluster summary test failed!", 0, 
               cs.getNumBlacklistedTrackers());
}

org.apache.hadoop.mapred.JobClient Java Examples