Java Code Examples for org.apache.hadoop.hbase.HBaseConfiguration#merge()

The following examples show how to use org.apache.hadoop.hbase.HBaseConfiguration#merge() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TableMapReduceUtil.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Sets up the job for reading from one or more table snapshots, with one or more scans
 * per snapshot.
 * It bypasses hbase servers and read directly from snapshot files.
 *
 * @param snapshotScans     map of snapshot name to scans on that snapshot.
 * @param mapper            The mapper class to use.
 * @param outputKeyClass    The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job               The current job to adjust.  Make sure the passed job is
 *                          carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *                          job classes via the distributed cache (tmpjars).
 */
public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
    Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
    Job job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
  MultiTableSnapshotInputFormat.setInput(job.getConfiguration(), snapshotScans, tmpRestoreDir);

  job.setInputFormatClass(MultiTableSnapshotInputFormat.class);
  if (outputValueClass != null) {
    job.setMapOutputValueClass(outputValueClass);
  }
  if (outputKeyClass != null) {
    job.setMapOutputKeyClass(outputKeyClass);
  }
  job.setMapperClass(mapper);
  Configuration conf = job.getConfiguration();
  HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));

  if (addDependencyJars) {
    addDependencyJars(job);
    addDependencyJarsForClasses(job.getConfiguration(), MetricRegistry.class);
  }

  resetCacheConfig(job.getConfiguration());
}
 
Example 2
Source File: IndexScrutinyTool.java    From phoenix with Apache License 2.0 6 votes vote down vote up
private Job configureSubmittableJob(Job job, Path outputPath, Class<IndexScrutinyMapperForTest> mapperClass) throws Exception {
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapreduce.job.user.classpath.first", true);
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    job.setJarByClass(IndexScrutinyTool.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (outputInvalidRows && OutputFormat.FILE.equals(outputFormat)) {
        job.setOutputFormatClass(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, outputPath);
    }
    job.setMapperClass((mapperClass == null ? IndexScrutinyMapper.class : mapperClass));
    job.setNumReduceTasks(0);
    // Set the Output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    TableMapReduceUtil.addDependencyJars(job);
    return job;
}
 
Example 3
Source File: IndexTool.java    From phoenix with Apache License 2.0 6 votes vote down vote up
private Job configureSubmittableJobUsingDirectApi(Job job) throws Exception {
    job.setReducerClass(PhoenixIndexImportDirectReducer.class);
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    // Set the Physical Table name for use in DirectHTableWriter#write(Mutation)
    conf.set(TableOutputFormat.OUTPUT_TABLE,
        PhoenixConfigurationUtil.getPhysicalTableName(job.getConfiguration()));
    //Set the Output classes
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    TableMapReduceUtil.addDependencyJars(job);
    job.setNumReduceTasks(1);
    return job;
}
 
Example 4
Source File: IndexTool.java    From hgraphdb with Apache License 2.0 5 votes vote down vote up
/**
 * Uses the HBase Front Door Api to write to index table. Submits the job and either returns or
 * waits for the job completion based on runForeground parameter.
 * 
 * @param job job
 * @param outputPath output path
 * @param runForeground - if true, waits for job completion, else submits and returns
 *            immediately.
 * @throws Exception
 */
private void configureSubmittableJobUsingDirectApi(Job job, Path outputPath, TableName outputTableName,
                                                   boolean skipDependencyJars, boolean runForeground)
        throws Exception {
    job.setMapperClass(getDirectMapperClass());
    job.setReducerClass(getDirectReducerClass());
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    conf.set(TableOutputFormat.OUTPUT_TABLE, outputTableName.getNameAsString());

    //Set the Output classes
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    if (!skipDependencyJars) {
        TableMapReduceUtil.addDependencyJars(job);
    }
    job.setNumReduceTasks(1);

    if (!runForeground) {
        LOG.info("Running Index Build in Background - Submit async and exit");
        job.submit();
        return;
    }
    LOG.info("Running Index Build in Foreground. Waits for the build to complete. This may take a long time!.");
    boolean result = job.waitForCompletion(true);
    if (!result) {
        LOG.error("IndexTool job failed!");
        throw new Exception("IndexTool job failed: " + job.toString());
    }
    FileSystem.get(conf).delete(outputPath, true);
}
 
Example 5
Source File: TableMapReduceUtil.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Use this before submitting a TableMap job. It will appropriately set up
 * the job.
 *
 * @param table  The table name to read from.
 * @param scan  The scan instance with the columns, time range etc.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @param initCredentials whether to initialize hbase auth credentials for the job
 * @param inputFormatClass the input format
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(String table, Scan scan,
    Class<? extends TableMapper> mapper,
    Class<?> outputKeyClass,
    Class<?> outputValueClass, Job job,
    boolean addDependencyJars, boolean initCredentials,
    Class<? extends InputFormat> inputFormatClass)
throws IOException {
  job.setInputFormatClass(inputFormatClass);
  if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
  if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
  job.setMapperClass(mapper);
  if (Put.class.equals(outputValueClass)) {
    job.setCombinerClass(PutCombiner.class);
  }
  Configuration conf = job.getConfiguration();
  HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
  conf.set(TableInputFormat.INPUT_TABLE, table);
  conf.set(TableInputFormat.SCAN, convertScanToString(scan));
  conf.setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      CellSerialization.class.getName());
  if (addDependencyJars) {
    addDependencyJars(job);
  }
  if (initCredentials) {
    initCredentials(job);
  }
}
 
Example 6
Source File: TableMapReduceUtil.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Use this before submitting a Multi TableMap job. It will appropriately set
 * up the job.
 *
 * @param scans The list of {@link Scan} objects to read from.
 * @param mapper The mapper class to use.
 * @param outputKeyClass The class of the output key.
 * @param outputValueClass The class of the output value.
 * @param job The current job to adjust. Make sure the passed job is carrying
 *          all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the
 *          configured job classes via the distributed cache (tmpjars).
 * @param initCredentials whether to initialize hbase auth credentials for the job
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(List<Scan> scans,
    Class<? extends TableMapper> mapper,
    Class<?> outputKeyClass,
    Class<?> outputValueClass, Job job,
    boolean addDependencyJars,
    boolean initCredentials) throws IOException {
  job.setInputFormatClass(MultiTableInputFormat.class);
  if (outputValueClass != null) {
    job.setMapOutputValueClass(outputValueClass);
  }
  if (outputKeyClass != null) {
    job.setMapOutputKeyClass(outputKeyClass);
  }
  job.setMapperClass(mapper);
  Configuration conf = job.getConfiguration();
  HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
  List<String> scanStrings = new ArrayList<>();

  for (Scan scan : scans) {
    scanStrings.add(convertScanToString(scan));
  }
  job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
    scanStrings.toArray(new String[scanStrings.size()]));

  if (addDependencyJars) {
    addDependencyJars(job);
  }

  if (initCredentials) {
    initCredentials(job);
  }
}
 
Example 7
Source File: TableMapReduceUtil.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The output table.
 * @param reducer  The reducer class to use.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param quorumAddress Distant cluster to write to; default is null for
 * output to the cluster that is designated in <code>hbase-site.xml</code>.
 * Set this String to the zookeeper ensemble of an alternate remote cluster
 * when you would have the reduce write a cluster that is other than the
 * default; e.g. copying tables between clusters, the source would be
 * designated by <code>hbase-site.xml</code> and this param would have the
 * ensemble address of the remote cluster.  The format to pass is particular.
 * Pass <code> &lt;hbase.zookeeper.quorum&gt;:&lt;
 *             hbase.zookeeper.client.port&gt;:&lt;zookeeper.znode.parent&gt;
 * </code> such as <code>server,server2,server3:2181:/hbase</code>.
 * @param serverClass redefined hbase.regionserver.class
 * @param serverImpl redefined hbase.regionserver.impl
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When determining the region count fails.
 */
public static void initTableReducerJob(String table,
  Class<? extends TableReducer> reducer, Job job,
  Class partitioner, String quorumAddress, String serverClass,
  String serverImpl, boolean addDependencyJars) throws IOException {

  Configuration conf = job.getConfiguration();
  HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
  job.setOutputFormatClass(TableOutputFormat.class);
  if (reducer != null) job.setReducerClass(reducer);
  conf.set(TableOutputFormat.OUTPUT_TABLE, table);
  conf.setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName());
  // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
  if (quorumAddress != null) {
    // Calling this will validate the format
    ZKConfig.validateClusterKey(quorumAddress);
    conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
  }
  if (serverClass != null && serverImpl != null) {
    conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
    conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
  }
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(Writable.class);
  if (partitioner == HRegionPartitioner.class) {
    job.setPartitionerClass(HRegionPartitioner.class);
    int regions = MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table));
    if (job.getNumReduceTasks() > regions) {
      job.setNumReduceTasks(regions);
    }
  } else if (partitioner != null) {
    job.setPartitionerClass(partitioner);
  }

  if (addDependencyJars) {
    addDependencyJars(job);
  }

  initCredentials(job);
}
 
Example 8
Source File: TestVerifyReplicationCrossDiffHdfs.java    From hbase with Apache License 2.0 4 votes vote down vote up
@Test
public void testVerifyRepBySnapshot() throws Exception {
  Path rootDir = CommonFSUtils.getRootDir(conf1);
  FileSystem fs = rootDir.getFileSystem(conf1);
  String sourceSnapshotName = "sourceSnapshot-" + System.currentTimeMillis();
  SnapshotTestingUtils.createSnapshotAndValidate(util1.getAdmin(), TABLE_NAME,
      Bytes.toString(FAMILY), sourceSnapshotName, rootDir, fs, true);

  // Take target snapshot
  Path peerRootDir = CommonFSUtils.getRootDir(conf2);
  FileSystem peerFs = peerRootDir.getFileSystem(conf2);
  String peerSnapshotName = "peerSnapshot-" + System.currentTimeMillis();
  SnapshotTestingUtils.createSnapshotAndValidate(util2.getAdmin(), TABLE_NAME,
      Bytes.toString(FAMILY), peerSnapshotName, peerRootDir, peerFs, true);

  String peerFSAddress = peerFs.getUri().toString();
  String temPath1 = new Path(fs.getUri().toString(), "/tmp1").toString();
  String temPath2 = "/tmp2";

  String[] args = new String[] { "--sourceSnapshotName=" + sourceSnapshotName,
    "--sourceSnapshotTmpDir=" + temPath1, "--peerSnapshotName=" + peerSnapshotName,
    "--peerSnapshotTmpDir=" + temPath2, "--peerFSAddress=" + peerFSAddress,
    "--peerHBaseRootAddress=" + CommonFSUtils.getRootDir(conf2), PEER_ID, TABLE_NAME.toString() };

  // Use the yarn's config override the source cluster's config.
  Configuration newConf = HBaseConfiguration.create(conf1);
  HBaseConfiguration.merge(newConf, mapReduceUtil.getConfiguration());
  newConf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1");
  CommonFSUtils.setRootDir(newConf, CommonFSUtils.getRootDir(conf1));
  Job job = new VerifyReplication().createSubmittableJob(newConf, args);
  if (job == null) {
    fail("Job wasn't created, see the log");
  }
  if (!job.waitForCompletion(true)) {
    fail("Job failed, see the log");
  }
  assertEquals(10,
    job.getCounters().findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
  assertEquals(0,
    job.getCounters().findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
}
 
Example 9
Source File: SpliceConfiguration.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
public static Configuration create(Configuration other){
	Configuration conf = create();
	HBaseConfiguration.merge(conf, other);
	return conf;
}