org.apache.hadoop.mapred.JobClient Java Examples

The following examples show how to use org.apache.hadoop.mapred.JobClient. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: NNBench.java From hadoop with Apache License 2.0

8 votes

/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}

Example #2

Source File: SolrClean.java From anthelion with Apache License 2.0

7 votes

public void delete(String crawldb, String solrUrl, boolean noCommit) throws IOException {
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("SolrClean: starting at " + sdf.format(start));

  JobConf job = new NutchJob(getConf());

  FileInputFormat.addInputPath(job, new Path(crawldb, CrawlDb.CURRENT_NAME));
  job.setBoolean("noCommit", noCommit);
  job.set(SolrConstants.SERVER_URL, solrUrl);
  job.setInputFormat(SequenceFileInputFormat.class);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapOutputKeyClass(ByteWritable.class);
  job.setMapOutputValueClass(Text.class);
  job.setMapperClass(DBFilter.class);
  job.setReducerClass(SolrDeleter.class);

  JobClient.runJob(job);

  long end = System.currentTimeMillis();
  LOG.info("SolrClean: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
}

Example #3

Source File: InfrastructureAnalyzer.java From systemds with Apache License 2.0

6 votes

/**
 * Analyzes properties of hadoop cluster and configuration.
 */
private static void analyzeHadoopCluster() {
	try {
		JobConf job = ConfigurationManager.getCachedJobConf();
		JobClient client = new JobClient(job);
		ClusterStatus stat = client.getClusterStatus();
		if( stat != null ) { //if in cluster mode
			//analyze cluster status
			_remotePar = stat.getTaskTrackers();
			_remoteParMap = stat.getMaxMapTasks(); 
			_remoteParReduce = stat.getMaxReduceTasks(); 
			
			//analyze pure configuration properties
			analyzeHadoopConfiguration();
		}
	} 
	catch (IOException e) {
		throw new RuntimeException("Unable to analyze infrastructure.",e);
	}
}

Example #4

Source File: TeraValidate.java From RDFS with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}

Example #5

Source File: DFSGeneralTest.java From RDFS with Apache License 2.0

6 votes

private void verifyFiles(FileSystem fs) 
    throws IOException {
  Path inputPath = new Path(input, "filelists");
  Path outputPath = new Path(dfs_output, "verify_results");
  if (!fs.exists(inputPath)) {
    System.out.println("Couldn't find " + inputPath + " Skip verification.");
    return;
  }
  System.out.println("-------------------");
  System.out.println("VERIFY FILES");
  System.out.println("-------------------");
  JobConf conf = new JobConf(fsConfig, DFSGeneralTest.class);
  conf.set(THREAD_CLASS_KEY, "org.apache.hadoop.mapred.GenReaderThread");
  testtype = GenReaderThread.TEST_TYPE;
  conf.set(TEST_TYPE_KEY, testtype);
  conf.setMapperClass(GenMapper.class);
  conf.setReducerClass(GenReduce.class);
  conf.setJobName(getUniqueName("gentest-verify-" + testtype));
  output = getUniqueName(OUTPUT + testtype);
  updateJobConf(conf, inputPath, outputPath);
  long startTime = System.currentTimeMillis();
  JobClient.runJob(conf);
  long endTime = System.currentTimeMillis();
  printResult(fs, new Path(output, "results"), startTime, endTime);
}

Example #6

Source File: HadoopSecurityManager_H_1_0.java From azkaban-plugins with Apache License 2.0

6 votes

private void cancelMRJobTrackerToken(
    final Token<? extends TokenIdentifier> t, String userToProxy)
    throws HadoopSecurityManagerException {
  try {
    getProxiedUser(userToProxy).doAs(new PrivilegedExceptionAction<Void>() {
      @SuppressWarnings("unchecked")
      @Override
      public Void run() throws Exception {
        cancelToken((Token<DelegationTokenIdentifier>) t);
        return null;
      }

      private void cancelToken(Token<DelegationTokenIdentifier> jt)
          throws IOException, InterruptedException {
        JobConf jc = new JobConf(conf);
        JobClient jobClient = new JobClient(jc);
        jobClient.cancelDelegationToken(jt);
      }
    });
  } catch (Exception e) {
    e.printStackTrace();
    throw new HadoopSecurityManagerException("Failed to cancel Token. "
        + e.getMessage() + e.getCause());
  }
}

Example #7

Source File: TestDFSIO.java From hadoop with Apache License 2.0

6 votes

private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Example #8

Source File: TestDatamerge.java From hadoop with Apache License 2.0

6 votes

private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Example #9

Source File: Main.java From hiped2 with Apache License 2.0

6 votes

public static void main(String... args) throws Exception {

    JobConf job = new JobConf();
    job.setJarByClass(Main.class);

    String input = args[0];
    Path output = new Path(args[1]);

    output.getFileSystem(job).delete(output, true);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextTaggedMapOutput.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    JobClient.runJob(job);
  }

Example #10

Source File: TestNoJobSetupCleanup.java From RDFS with Apache License 2.0

6 votes

private Job submitAndValidateJob(JobConf conf, int numMaps, int numReds)
    throws IOException, InterruptedException, ClassNotFoundException {
  conf.setJobSetupCleanupNeeded(false);
  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir,
              numMaps, numReds);

  job.setOutputFormatClass(MyOutputFormat.class);
  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());
  JobID jobid = (org.apache.hadoop.mapred.JobID)job.getID();

  JobClient jc = new JobClient(conf);
  assertTrue(jc.getSetupTaskReports(jobid).length == 0);
  assertTrue(jc.getCleanupTaskReports(jobid).length == 0);
  assertTrue(jc.getMapTaskReports(jobid).length == numMaps);
  assertTrue(jc.getReduceTaskReports(jobid).length == numReds);
  FileSystem fs = FileSystem.get(conf);
  assertTrue("Job output directory doesn't exit!", fs.exists(outDir));
  FileStatus[] list = fs.listStatus(outDir, new OutputFilter());
  int numPartFiles = numReds == 0 ? numMaps : numReds;
  assertTrue("Number of part-files is " + list.length + " and not "
      + numPartFiles, list.length == numPartFiles);
  return job;
}

Example #11

Source File: MapReduceLauncher.java From spork with Apache License 2.0

6 votes

@Override
public void killJob(String jobID, Configuration conf) throws BackendException {
    try {
        if (conf != null) {
            JobConf jobConf = new JobConf(conf);
            JobClient jc = new JobClient(jobConf);
            JobID id = JobID.forName(jobID);
            RunningJob job = jc.getJob(id);
            if (job == null)
                System.out.println("Job with id " + jobID + " is not active");
            else
            {
                job.killJob();
                log.info("Kill " + id + " submitted.");
            }
        }
    } catch (IOException e) {
        throw new BackendException(e);
    }
}

Example #12

Source File: DataJoinJob.java From big-c with Apache License 2.0

6 votes

/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}

Example #13

Source File: LineIndexer.java From attic-apex-malhar with Apache License 2.0

6 votes

/**
 * The actual main() method for our program; this is the
 * "driver" for the MapReduce job.
 */
public static void main(String[] args)
{
  JobClient client = new JobClient();
  JobConf conf = new JobConf(LineIndexer.class);

  conf.setJobName("LineIndexer");

  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.addInputPath(conf, new Path("input"));
  FileOutputFormat.setOutputPath(conf, new Path("output"));

  conf.setMapperClass(LineIndexMapper.class);
  conf.setReducerClass(LineIndexReducer.class);

  client.setConf(conf);

  try {
    JobClient.runJob(conf);
  } catch (Exception e) {
    e.printStackTrace();
  }
}

Example #14

Source File: TestDatamerge.java From big-c with Apache License 2.0

6 votes

public void testEmptyJoin() throws Exception {
  JobConf job = new JobConf();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
      Fake_IF.class, src));
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(IdentityMapper.class);
  job.setReducerClass(IdentityReducer.class);
  job.setOutputKeyClass(IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Example #15

Source File: TestDatamerge.java From hadoop-gpu with Apache License 2.0

6 votes

private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapred.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Example #16

Source File: DataJoinJob.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}

Example #17

Source File: ReadIopsCalculator.java From emr-dynamodb-connector with Apache License 2.0

6 votes

public ReadIopsCalculator(JobClient jobClient, DynamoDBClient dynamoDBClient, String tableName,
    int totalSegments, int localSegments) {
  this.jobConf = (JobConf) jobClient.getConf();
  this.jobClient = jobClient;

  this.dynamoDBClient = dynamoDBClient;
  this.tableName = tableName;
  this.totalSegments = totalSegments;
  this.localSegments = localSegments;

  this.throughputPercent = Double.parseDouble(jobConf.get(DynamoDBConstants
      .THROUGHPUT_READ_PERCENT, DynamoDBConstants.DEFAULT_THROUGHPUT_PERCENTAGE));

  log.info("Table name: " + tableName);
  log.info("Throughput percent: " + throughputPercent);
}

Example #18

Source File: WordCountInput.java From aerospike-hadoop with Apache License 2.0

6 votes

public int run(final String[] args) throws Exception {

        log.info("run starting");

        final Configuration conf = getConf();

        JobConf job = new JobConf(conf, WordCountInput.class);
        job.setJobName("AerospikeWordCountInput");

        job.setInputFormat(AerospikeInputFormat.class);
        job.setMapperClass(Map.class);
        job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormat(TextOutputFormat.class);

        FileOutputFormat.setOutputPath(job, new Path(args[0]));

        JobClient.runJob(job);

        log.info("finished");
        return 0;
    }

Example #19

Source File: TestDatamerge.java From RDFS with Apache License 2.0

6 votes

private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapred.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Example #20

Source File: TeraValidate.java From hadoop-gpu with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}

Example #21

Source File: NNBench.java From big-c with Apache License 2.0

6 votes

/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}

Example #22

Source File: JobStatusProvider.java From ankush with GNU Lesser General Public License v3.0

6 votes

public Map<String, Object> getJobDetails(JobClient jobClient, String jobId)
		throws AnkushException {
	String errMsg = "Unable to getch Hadoop jobs details, could not connect to Hadoop JobClient.";
	try {
		if (jobClient != null) {
			// Get the jobs that are submitted.
			JobStatus[] jobStatus = jobClient.getAllJobs();
			for (JobStatus jobSts : jobStatus) {

			}
		}
	} catch (Exception e) {
		HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg,
				Constant.Component.Name.HADOOP, e);
		throw new AnkushException(errMsg);
	}
	return null;
}

Example #23

Source File: TestTableMapReduceUtil.java From hbase with Apache License 2.0

6 votes

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  try {
    jobConf.setJobName("process row task");
    jobConf.setNumReduceTasks(1);
    TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
        ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
        jobConf);
    TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
        ClassificatorRowReduce.class, jobConf);
    RunningJob job = JobClient.runJob(jobConf);
    assertTrue(job.isSuccessful());
  } finally {
    if (jobConf != null)
      FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
  }
}

Example #24

Source File: NNBench.java From RDFS with Apache License 2.0

6 votes

/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests(Configuration config) throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}

Example #25

Source File: SimplePigStats.java From spork with Apache License 2.0

6 votes

void initialize(PigContext pigContext, JobClient jobClient,
        JobControlCompiler jcc, MROperPlan mrPlan) {
    super.start();

    if (pigContext == null || jobClient == null || jcc == null) {
        LOG.warn("invalid params: " + pigContext + jobClient + jcc);
        return;
    }

    this.pigContext = pigContext;
    this.jobClient = jobClient;
    this.jcc = jcc;

    // build job DAG with job ids assigned to null
    try {
        new JobGraphBuilder(mrPlan).visit();
    } catch (VisitorException e) {
        LOG.warn("unable to build job plan", e);
    }
}

Example #26

Source File: Statistics.java From big-c with Apache License 2.0

5 votes

public Statistics(
  final Configuration conf, int pollingInterval, CountDownLatch startFlag)
  throws IOException, InterruptedException {
    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
    this.cluster = ugi.doAs(new PrivilegedExceptionAction<JobClient>() {
      public JobClient run() throws IOException {
        return new JobClient(new JobConf(conf));
      }
    });

  this.jtPollingInterval = pollingInterval;
  maxJobCompletedInInterval = conf.getInt(
    MAX_JOBS_COMPLETED_IN_POLL_INTERVAL_KEY, 1);
  this.startFlag = startFlag;
}

Example #27

Source File: BusyLegs.java From gemfirexd-oss with Apache License 2.0

5 votes

public int run(String[] args) throws Exception {

    GfxdDataSerializable.initTypes();

    JobConf conf = new JobConf(getConf());
    conf.setJobName("Busy Leg Count");

    Path outputPath = new Path(args[0]);
    String hdfsHomeDir = args[1];
    String tableName = args[2];

    outputPath.getFileSystem(conf).delete(outputPath, true);

    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);

    // Configure Mapper
    conf.setInputFormat(RowInputFormat.class);
    conf.setMapperClass(SampleMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    // Configure Reducer
    conf.setReducerClass(SampleReducer.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(conf, outputPath);

    JobClient.runJob(conf);
    return 0;
  }

Example #28

Source File: SessionRollup.java From aerospike-hadoop with Apache License 2.0

5 votes

public int run(final String[] args) throws Exception {

        log.info("run starting");

        final Configuration conf = getConf();

        JobConf job = new JobConf(conf, SessionRollup.class);
        job.setJobName("AerospikeSessionRollup");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(LongWritable.class);
        // job.setCombinerClass(Reduce.class);  // Reduce changes format.
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Session.class);

        job.setOutputFormat(SessionOutputFormat.class);

        for (int ii = 0; ii < args.length; ++ii)
            FileInputFormat.addInputPath(job, new Path(args[ii]));

        JobClient.runJob(job);

        log.info("finished");
        return 0;
    }

Example #29

Source File: LinkRank.java From anthelion with Apache License 2.0

5 votes

/**
 * Runs the link analysis job. The link analysis job applies the link rank
 * formula to create a score per url and stores that score in the NodeDb.
 * 
 * Typically the link analysis job is run a number of times to allow the link
 * rank scores to converge.
 * 
 * @param nodeDb The node database from which we are getting previous link
 * rank scores.
 * @param inverted The inverted inlinks
 * @param output The link analysis output.
 * @param iteration The current iteration number.
 * @param numIterations The total number of link analysis iterations
 * 
 * @throws IOException If an error occurs during link analysis.
 */
private void runAnalysis(Path nodeDb, Path inverted, Path output,
  int iteration, int numIterations, float rankOne)
  throws IOException {

  JobConf analyzer = new NutchJob(getConf());
  analyzer.set("link.analyze.iteration", String.valueOf(iteration + 1));
  analyzer.setJobName("LinkAnalysis Analyzer, iteration " + (iteration + 1)
    + " of " + numIterations);
  FileInputFormat.addInputPath(analyzer, nodeDb);
  FileInputFormat.addInputPath(analyzer, inverted);
  FileOutputFormat.setOutputPath(analyzer, output);
  analyzer.set("link.analyze.rank.one", String.valueOf(rankOne));
  analyzer.setMapOutputKeyClass(Text.class);
  analyzer.setMapOutputValueClass(ObjectWritable.class);
  analyzer.setInputFormat(SequenceFileInputFormat.class);
  analyzer.setMapperClass(Analyzer.class);
  analyzer.setReducerClass(Analyzer.class);
  analyzer.setOutputKeyClass(Text.class);
  analyzer.setOutputValueClass(Node.class);
  analyzer.setOutputFormat(MapFileOutputFormat.class);
  analyzer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  LOG.info("Starting analysis job");
  try {
    JobClient.runJob(analyzer);
  }
  catch (IOException e) {
    LOG.error(StringUtils.stringifyException(e));
    throw e;
  }
  LOG.info("Finished analysis job.");
}

Example #30

Source File: TestGridmixSummary.java From hadoop with Apache License 2.0

5 votes

/**
 * Test {@link ClusterSummarizer}.
 */
@Test  (timeout=20000)
public void testClusterSummarizer() throws IOException {
  ClusterSummarizer cs = new ClusterSummarizer();
  Configuration conf = new Configuration();
  
  String jt = "test-jt:1234";
  String nn = "test-nn:5678";
  conf.set(JTConfig.JT_IPC_ADDRESS, jt);
  conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, nn);
  cs.start(conf);
  
  assertEquals("JT name mismatch", jt, cs.getJobTrackerInfo());
  assertEquals("NN name mismatch", nn, cs.getNamenodeInfo());
  
  ClusterStats cStats = ClusterStats.getClusterStats();
  conf.set(JTConfig.JT_IPC_ADDRESS, "local");
  conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "local");
  JobClient jc = new JobClient(conf);
  cStats.setClusterMetric(jc.getClusterStatus());
  
  cs.update(cStats);
  
  // test
  assertEquals("Cluster summary test failed!", 1, cs.getMaxMapTasks());
  assertEquals("Cluster summary test failed!", 1, cs.getMaxReduceTasks());
  assertEquals("Cluster summary test failed!", 1, cs.getNumActiveTrackers());
  assertEquals("Cluster summary test failed!", 0, 
               cs.getNumBlacklistedTrackers());
}