org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil Java Examples

The following examples show how to use org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TransformBaseRunner.java From BigDataPlatform with GNU General Public License v3.0

6 votes

protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);

    job.setJarByClass(this.runnerClass);
    // 本地运行
//    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, false);
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, true);
    // 集群运行：本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass,
    // job);
    job.setReducerClass(this.reducerClass);
    job.setOutputKeyClass(this.outputKeyClass);
    job.setOutputValueClass(this.outputValueClass);
    job.setOutputFormatClass(this.outputFormatClass);
    return job;
  }

Example #2

Source File: LeastRecentlyUsedPruner.java From metron with Apache License 2.0

6 votes

public static void setupHBaseJob(Job job, String sourceTable, String cf) throws IOException {
        Scan scan = new Scan();
        if(cf != null) {
            scan.addFamily(Bytes.toBytes(cf));
        }
        scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
        scan.setCacheBlocks(false);  // don't set to true for MR jobs
// set other scan attrs

        TableMapReduceUtil.initTableMapperJob(
                sourceTable,      // input table
                scan,	          // Scan instance to control CF and attribute selection
                PrunerMapper.class,   // mapper class
                null,	          // mapper output key
                null,	          // mapper output value
                job);
        TableMapReduceUtil.initTableReducerJob(
                sourceTable,      // output table
                null,             // reducer class
                job);
    }

Example #3

Source File: IntegrationTestLoadAndVerify.java From hbase with Apache License 2.0

6 votes

protected Job doLoad(Configuration conf, TableDescriptor tableDescriptor) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "load-output");
  LOG.info("Load output dir: " + outputDir);

  NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
  conf.set(TABLE_NAME_KEY, tableDescriptor.getTableName().getNameAsString());

  Job job = Job.getInstance(conf);
  job.setJobName(TEST_NAME + " Load for " + tableDescriptor.getTableName());
  job.setJarByClass(this.getClass());
  setMapperClass(job);
  job.setInputFormatClass(NMapInputFormat.class);
  job.setNumReduceTasks(0);
  setJobScannerConf(job);
  FileOutputFormat.setOutputPath(job, outputDir);

  TableMapReduceUtil.addDependencyJars(job);

  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);
  assertTrue(job.waitForCompletion(true));
  return job;
}

Example #4

Source File: TableInputFormatTest.java From hgraphdb with Apache License 2.0

6 votes

private void runTestOnTable() throws InterruptedException, ClassNotFoundException {
    Job job = null;
    try {
        Configuration conf = graph.configuration().toHBaseConfiguration();
        job = Job.getInstance(conf, "test123");
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setNumReduceTasks(0);
        Scan scan = new Scan();
        scan.addColumn(FAMILY_NAME, COLUMN_NAME);
        scan.setTimeRange(MINSTAMP, MAXSTAMP);
        scan.setMaxVersions();
        TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(),
                scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job,
                true, TableInputFormat.class);
        job.waitForCompletion(true);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        if (job != null) {
            FileUtil.fullyDelete(
                    new File(job.getConfiguration().get("hadoop.tmp.dir")));
        }
    }
}

Example #5

Source File: IntegrationTestLoadAndVerify.java From hbase with Apache License 2.0

6 votes

protected void doVerify(Configuration conf, TableDescriptor tableDescriptor) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "verify-output");
  LOG.info("Verify output dir: " + outputDir);

  Job job = Job.getInstance(conf);
  job.setJarByClass(this.getClass());
  job.setJobName(TEST_NAME + " Verification for " + tableDescriptor.getTableName());
  setJobScannerConf(job);

  Scan scan = new Scan();

  TableMapReduceUtil.initTableMapperJob(
      tableDescriptor.getTableName().getNameAsString(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
  int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
  TableMapReduceUtil.setScannerCaching(job, scannerCaching);

  job.setReducerClass(VerifyReducer.class);
  job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
  FileOutputFormat.setOutputPath(job, outputDir);
  assertTrue(job.waitForCompletion(true));

  long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
  assertEquals(0, numOutputRecords);
}

Example #6

Source File: CellCounter.java From cloud-bigtable-examples with Apache License 2.0

6 votes

/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
    throws IOException {
  String tableName = args[0];
  Path outputDir = new Path(args[1]);
  String reportSeparatorString = (args.length > 2) ? args[2]: ":";
  conf.set("ReportSeparator", reportSeparatorString);
  Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
  job.setJarByClass(CellCounter.class);
  Scan scan = getConfiguredScanForJob(conf, args);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
      CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(1);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setReducerClass(IntSumReducer.class);
  return job;
}

Example #7

Source File: HFileOutputFormat3.java From kylin with Apache License 2.0

6 votes

public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat3.class);

    // Set compression algorithms based on column families
    configureCompression(conf, table.getTableDescriptor());
    configureBloomType(table.getTableDescriptor(), conf);
    configureBlockSize(table.getTableDescriptor(), conf);
    HTableDescriptor tableDescriptor = table.getTableDescriptor();
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + table.getName() + " output configured.");
}

Example #8

Source File: UpdateClusterJob.java From recsys-offline with Apache License 2.0

6 votes

public void run() {

		try {
			Job job = Job.getInstance(HBaseContext.config, "UpdateClusterJob");
			job.setJarByClass(UpdateClusterJob.class);

			Scan scan = new Scan();
			scan.setCaching(500);
			scan.setCacheBlocks(false);
			TableMapReduceUtil.initTableMapperJob(
					Constants.hbase_cluster_model_table, scan,
					HBaseReadMapper.class, Text.class, Text.class, job);
			TableMapReduceUtil.initTableReducerJob(
					Constants.hbase_cluster_model_table,
					HBaseWriteReducer.class, job);
			job.setNumReduceTasks(4);

			boolean b = job.waitForCompletion(true);
			if (!b) {
				throw new IOException("error with job!");
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

Example #9

Source File: IndexScrutinyTool.java From phoenix with Apache License 2.0

6 votes

private Job configureSubmittableJob(Job job, Path outputPath, Class<IndexScrutinyMapperForTest> mapperClass) throws Exception {
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapreduce.job.user.classpath.first", true);
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    job.setJarByClass(IndexScrutinyTool.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (outputInvalidRows && OutputFormat.FILE.equals(outputFormat)) {
        job.setOutputFormatClass(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, outputPath);
    }
    job.setMapperClass((mapperClass == null ? IndexScrutinyMapper.class : mapperClass));
    job.setNumReduceTasks(0);
    // Set the Output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    TableMapReduceUtil.addDependencyJars(job);
    return job;
}

Example #10

Source File: IndexTool.java From phoenix with Apache License 2.0

6 votes

private Job configureSubmittableJobUsingDirectApi(Job job) throws Exception {
    job.setReducerClass(PhoenixIndexImportDirectReducer.class);
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    // Set the Physical Table name for use in DirectHTableWriter#write(Mutation)
    conf.set(TableOutputFormat.OUTPUT_TABLE,
        PhoenixConfigurationUtil.getPhysicalTableName(job.getConfiguration()));
    //Set the Output classes
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    TableMapReduceUtil.addDependencyJars(job);
    job.setNumReduceTasks(1);
    return job;
}

Example #11

Source File: HBaseBulkImportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

@Override
protected void jobSetup(Job job) throws IOException, ImportException {
  super.jobSetup(job);

  // we shouldn't have gotten here if bulk load dir is not set
  // so let's throw a ImportException
  if(getContext().getDestination() == null){
    throw new ImportException("Can't run HBaseBulkImportJob without a " +
        "valid destination directory.");
  }

  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Preconditions.class);
  FileOutputFormat.setOutputPath(job, getContext().getDestination());
  HTable hTable = new HTable(job.getConfiguration(), options.getHBaseTable());
  HFileOutputFormat.configureIncrementalLoad(job, hTable);
}

Example #12

Source File: HFileOutputFormat3.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat3.class);

    // Set compression algorithms based on column families
    configureCompression(conf, table.getTableDescriptor());
    configureBloomType(table.getTableDescriptor(), conf);
    configureBlockSize(table.getTableDescriptor(), conf);
    HTableDescriptor tableDescriptor = table.getTableDescriptor();
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + table.getName() + " output configured.");
}

Example #13

Source File: IntegrationTestWithCellVisibilityLoadAndVerify.java From hbase with Apache License 2.0

6 votes

private Job doVerify(Configuration conf, TableDescriptor tableDescriptor, String... auths)
    throws IOException, InterruptedException, ClassNotFoundException {
  Path outputDir = getTestDir(TEST_NAME, "verify-output");
  Job job = new Job(conf);
  job.setJarByClass(this.getClass());
  job.setJobName(TEST_NAME + " Verification for " + tableDescriptor.getTableName());
  setJobScannerConf(job);
  Scan scan = new Scan();
  scan.setAuthorizations(new Authorizations(auths));
  TableMapReduceUtil.initTableMapperJob(tableDescriptor.getTableName().getNameAsString(), scan,
      VerifyMapper.class, NullWritable.class, NullWritable.class, job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
  int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
  TableMapReduceUtil.setScannerCaching(job, scannerCaching);
  job.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(job, outputDir);
  assertTrue(job.waitForCompletion(true));
  return job;
}

Example #14

Source File: WCRunner.java From BigDataArchitect with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration(true);
        conf.set("hbase.zookeeper.quorum","node04,node02,node03");
        conf.set("mapreduce.app-submission.cross-platform","true");
        conf.set("mapreduce.framework.name","local");

        //创建job对象
        Job job = Job.getInstance(conf);
        job.setJarByClass(WCRunner.class);

        //设置mapper类
        job.setMapperClass(WCMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        //设置reduce类
//        job.setReducerClass();
//        TableMapReduceUtil.initTableMapperJob();
        TableMapReduceUtil.initTableReducerJob("wc",WCReducer.class,job,null,null,null,null,false);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Put.class);

        //指定hdfs存储数据的目录
        FileInputFormat.addInputPath(job,new Path("/wc/wc"));
        job.waitForCompletion(true);
    }

Example #15

Source File: TransformerBaseRunner.java From BigDataArchitect with Apache License 2.0

6 votes

/**
 * 创建job
 * 
 * @param conf
 * @return
 * @throws IOException
 */
protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);

    job.setJarByClass(this.runnerClass);
    // 本地运行
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, false);
    // 集群运行：本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass,
    // job);
    job.setReducerClass(this.reducerClass);
    job.setOutputKeyClass(this.outputKeyClass);
    job.setOutputValueClass(this.outputValueClass);
    job.setOutputFormatClass(this.outputFormatClass);
    return job;
}

Example #16

Source File: AnalyserLogDataRunner.java From BigDataArchitect with Apache License 2.0

6 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = this.getConf();
	this.processArgs(conf, args);

	Job job = Job.getInstance(conf, "analyser_logdata");


	job.setJarByClass(AnalyserLogDataRunner.class);
	job.setMapperClass(AnalyserLogDataMapper.class);
	job.setMapOutputKeyClass(NullWritable.class);
	job.setMapOutputValueClass(Put.class);
	TableMapReduceUtil.initTableReducerJob(
			EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null,
			null, null, false);
	job.setNumReduceTasks(0);

	// 设置输入路径
	this.setJobInputPaths(job);
	return job.waitForCompletion(true) ? 0 : -1;
}

Example #17

Source File: JobFileProcessor.java From hraven with Apache License 2.0

6 votes

/**
 * @param conf to use to create and run the job
 * @param scan to be used to scan the raw table.
 * @param totalJobCount the total number of jobs that need to be run in this
 *          batch. Used in job name.
 * @return The job to be submitted to the cluster.
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private Job getProcessingJob(Configuration conf, Scan scan, int totalJobCount)
    throws IOException {

  Configuration confClone = new Configuration(conf);

  // Turn off speculative execution.
  // Note: must be BEFORE the job construction with the new mapreduce API.
  confClone.setBoolean("mapred.map.tasks.speculative.execution", false);

  // Set up job
  Job job = new Job(confClone, getJobName(totalJobCount));

  // This is a map-only class, skip reduce step
  job.setNumReduceTasks(0);
  job.setJarByClass(JobFileProcessor.class);
  job.setOutputFormatClass(MultiTableOutputFormat.class);

  TableMapReduceUtil.initTableMapperJob(Constants.HISTORY_RAW_TABLE, scan,
      JobFileTableMapper.class, JobFileTableMapper.getOutputKeyClass(),
      JobFileTableMapper.getOutputValueClass(), job);

  return job;
}

Example #18

Source File: AnalyserLogDataRunner.java From BigDataPlatform with GNU General Public License v3.0

6 votes

@Override
  public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);

    Job job = Job.getInstance(conf, "analyser_logdata");
    job.setJarByClass(AnalyserLogDataRunner.class);
    job.setMapperClass(AnalyserLogDataMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Put.class);

    //设置reducer配置
    //1集群上运行 打成jar运行  (要求addDependencyJars为true(默认true)
//    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job,null,
        null,null,null,true);
    //2本地运行 打成jar运行  (要求addDependencyJars为true(默认true)
//    TableMapReduceUtil
//        .initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
//            null, false);
    //设置输入路径
    job.setNumReduceTasks(0);
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
  }

Example #19

Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0

5 votes

public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMultiplier, Integer numWalkers)
    throws Exception {
  LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
  createSchema();
  job = Job.getInstance(getConf());

  job.setJobName("Link Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  FileInputFormat.setInputPaths(job, tmpOutput);
  job.setInputFormatClass(OneFilePerMapperSFIF.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);

  setMapperForGenerator(job);

  job.setOutputFormatClass(NullOutputFormat.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                                                 AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}

Example #20

Source File: WordCountHBase.java From cloud-bigtable-examples with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: wordcount-hbase <in> [<in>...] <table-name>");
    System.exit(2);
  }

  Job job = Job.getInstance(conf, "word count");

  for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }

  TableName tableName = TableName.valueOf(otherArgs[otherArgs.length - 1]);
  try {
    CreateTable.createTable(tableName, conf,
        Collections.singletonList(Bytes.toString(COLUMN_FAMILY)));
  } catch (Exception e) {
    LOG.error("Could not create the table.", e);
  }

  job.setJarByClass(WordCountHBase.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setMapOutputValueClass(IntWritable.class);

  TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), MyTableReducer.class, job);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Example #21

Source File: PhoenixConfigurationUtil.java From phoenix with Apache License 2.0

5 votes

public static void loadHBaseConfiguration(Job job) throws IOException {
    // load hbase-site.xml
    Configuration hbaseConf = HBaseConfiguration.create();
    for (Map.Entry<String, String> entry : hbaseConf) {
        if (job.getConfiguration().get(entry.getKey()) == null) {
            job.getConfiguration().set(entry.getKey(), entry.getValue());
        }
    }
    //In order to have phoenix working on a secured cluster
    TableMapReduceUtil.initCredentials(job);
}

Example #22

Source File: MapreduceDependencyClasspathTool.java From hbase with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
  if (args.length > 0) {
    System.err.println("Usage: hbase mapredcp [-Dtmpjars=...]");
    System.err.println("  Construct a CLASSPATH containing dependency jars required to run a mapreduce");
    System.err.println("  job. By default, includes any jars detected by TableMapReduceUtils. Provide");
    System.err.println("  additional entries by specifying a comma-separated list in tmpjars.");
    return 0;
  }

  TableMapReduceUtil.addHBaseDependencyJars(getConf());
  System.out.println(TableMapReduceUtil.buildDependencyClasspath(getConf()));
  return 0;
}

Example #23

Source File: PerformanceEvaluation.java From hbase with Apache License 2.0

5 votes

/**
 * Run a mapreduce job.  Run as many maps as asked-for clients.
 * Before we start up the job, write out an input file with instruction
 * per client regards which row they are to start on.
 * @param cmd Command to run.
 */
private void doMapReduce(final Class<? extends Test> cmd)
    throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}

Example #24

Source File: UpdateStatisticsTool.java From phoenix with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
    parseArgs(args);
    preJobTask();
    configureJob();
    TableMapReduceUtil.initCredentials(job);
    int ret = runJob();
    postJobTask();
    return ret;
}

Example #25

Source File: UpdateStatisticsTool.java From phoenix with Apache License 2.0

5 votes

private void configureJob() throws Exception {
    job = Job.getInstance(getConf(),
            "UpdateStatistics-" + tableName + "-" + snapshotName);
    PhoenixMapReduceUtil.setInput(job, NullDBWritable.class,
            snapshotName, tableName, restoreDir);

    PhoenixConfigurationUtil.setMRJobType(job.getConfiguration(), MRJobType.UPDATE_STATS);

    // DO NOT allow mapper splits using statistics since it may result into many smaller chunks
    PhoenixConfigurationUtil.setSplitByStats(job.getConfiguration(), false);

    job.setJarByClass(UpdateStatisticsTool.class);
    job.setMapperClass(TableSnapshotMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);
    job.setPriority(this.jobPriority);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), PhoenixConnection.class, Chronology.class,
            CharStream.class, TransactionSystemClient.class, TransactionNotInProgressException.class,
            ZKClient.class, DiscoveryServiceClient.class, ZKDiscoveryService.class,
            Cancellable.class, TTransportException.class, SpanReceiver.class, TransactionProcessor.class, Gauge.class, MetricRegistriesImpl.class);
    LOGGER.info("UpdateStatisticsTool running for: " + tableName
            + " on snapshot: " + snapshotName + " with restore dir: " + restoreDir);
}

Example #26

Source File: PrepareClusterJob.java From recsys-offline with Apache License 2.0

5 votes

public void run() {
	
	try {
		Job job = Job.getInstance(HBaseContext.config, "ClusterPrepareJob");
		job.setJarByClass(PrepareClusterJob.class);

		Scan scan = new Scan();
		scan.setCaching(500);
		scan.setCacheBlocks(false);
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_yearrate.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_repaylimittime.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_progress.getBytes());

		Filter filter = new SingleColumnValueFilter(Bytes.toBytes(Constants.hbase_column_family), 
				Bytes.toBytes(Constants.hbase_column_progress), CompareOp.NOT_EQUAL, Bytes.toBytes("100"));

		scan.setFilter(filter);

		TableMapReduceUtil.initTableMapperJob(Constants.hbase_p2p_table,
				scan, HBaseReadMapper.class, Text.class, Text.class, job);
		TableMapReduceUtil.initTableReducerJob(
				Constants.hbase_cluster_model_table,
				HBaseWriteReducer.class, job);
		job.setNumReduceTasks(1);

		boolean b = job.waitForCompletion(true);
		if (!b) {
			throw new IOException("error with job!");
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
}

Example #27

Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0

5 votes

public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMultiplier, Integer numWalkers)
    throws Exception {
  LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
      + ", numNodes=" + numNodes);
  Job job = Job.getInstance(getConf());

  job.setJobName("Random Input Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  job.setInputFormatClass(GeneratorInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);

  job.setMapperClass(Mapper.class); //identity mapper

  FileOutputFormat.setOutputPath(job, tmpOutput);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Random64.class);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}

Example #28

Source File: IntegrationTestBigLinkedListWithVisibility.java From hbase with Apache License 2.0

5 votes

private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException,
    ClassNotFoundException {
  job = new Job(getConf());

  job.setJobName("Link Verifier");
  job.setNumReduceTasks(numReducers);
  job.setJarByClass(getClass());

  setJobScannerConf(job);

  Scan scan = new Scan();
  scan.addColumn(FAMILY_NAME, COLUMN_PREV);
  scan.setCaching(10000);
  scan.setCacheBlocks(false);
  String[] split = labels.split(COMMA);

  scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2],
      split[(this.labelIndex * 2) + 1]));

  TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);

  job.setReducerClass(VerifyReducer.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, outputDir);
  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}

Example #29

Source File: CompactionTool.java From hbase with Apache License 2.0

5 votes

/**
 * Execute compaction, using a Map-Reduce job.
 */
private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
    final boolean compactOnce, final boolean major) throws Exception {
  Configuration conf = getConf();
  conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
  conf.setBoolean(CONF_COMPACT_MAJOR, major);

  Job job = new Job(conf);
  job.setJobName("CompactionTool");
  job.setJarByClass(CompactionTool.class);
  job.setMapperClass(CompactionMapper.class);
  job.setInputFormatClass(CompactionInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapSpeculativeExecution(false);
  job.setNumReduceTasks(0);

  // add dependencies (including HBase ones)
  TableMapReduceUtil.addDependencyJars(job);

  Path stagingDir = JobUtil.getQualifiedStagingDir(conf);
  FileSystem stagingFs = stagingDir.getFileSystem(conf);
  try {
    // Create input file with the store dirs
    Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
    List<Path> storeDirs = CompactionInputFormat.createInputFile(fs, stagingFs,
        inputPath, toCompactDirs);
    CompactionInputFormat.addInputPath(job, inputPath);

    // Initialize credential for secure cluster
    TableMapReduceUtil.initCredentials(job);
    // Despite the method name this will get delegation token for the filesystem
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      storeDirs.toArray(new Path[0]), conf);

    // Start the MR Job and wait
    return job.waitForCompletion(true) ? 0 : 1;
  } finally {
    fs.delete(stagingDir, true);
  }
}

Example #30

Source File: HBaseViewKeyInputFormat.java From kite with Apache License 2.0

5 votes

private TableInputFormat getDelegate(Configuration conf) throws IOException {
  TableInputFormat delegate = new TableInputFormat();
  String tableName = HBaseMetadataProvider.getTableName(dataset.getName());
  conf.set(TableInputFormat.INPUT_TABLE, tableName);
  if (view != null) {
    Job tempJob = new Job();
    Scan scan = ((BaseEntityScanner) view.newEntityScanner()).getScan();
    TableMapReduceUtil.initTableMapperJob(tableName, scan, TableMapper.class, null,
        null, tempJob);
    Configuration tempConf = Hadoop.JobContext.getConfiguration.invoke(tempJob);
    conf.set(SCAN, tempConf.get(SCAN));
  }
  delegate.setConf(conf);
  return delegate;
}