org.apache.hadoop.hbase.mapreduce.TableOutputFormat Java Examples

The following examples show how to use org.apache.hadoop.hbase.mapreduce.TableOutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HBaseStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void setStoreLocation(String location, Job job) throws IOException {
    if (location.startsWith("hbase://")){
        job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location.substring(8));
    }else{
        job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location);
    }

    String serializedSchema = getUDFProperties().getProperty(contextSignature + "_schema");
    if (serializedSchema!= null) {
        schema_ = (ResourceSchema) ObjectSerializer.deserialize(serializedSchema);
    }

    m_conf = initializeLocalJobConfig(job);
    // Not setting a udf property and getting the hbase delegation token
    // only once like in setLocation as setStoreLocation gets different Job
    // objects for each call and the last Job passed is the one that is
    // launched. So we end up getting multiple hbase delegation tokens.
    addHBaseDelegationToken(m_conf, job);
}
 
Example #2
Source File: UpdateCFJob.java    From recsys-offline with Apache License 2.0 6 votes vote down vote up
public void run() throws Exception{
	long startTime = System.currentTimeMillis();
	Configuration conf = new Configuration();
	conf.set(TableOutputFormat.OUTPUT_TABLE, Constants.hbase_user_item_pref_table);
	Job job = Job.getInstance(conf, "hbasewriter"+System.currentTimeMillis());
	job.setJarByClass(UpdateCFJob.class);
	job.setMapperClass(TokenizerMapper.class);
	job.setReducerClass(HBaseWriteReducer.class);
	job.setMapOutputKeyClass(IntWritable.class);  
	job.setMapOutputValueClass(Text.class);
	job.setOutputFormatClass(TableOutputFormat.class);
	FileInputFormat.addInputPath(job, new Path(input));
	long endTime = System.currentTimeMillis();
	boolean isFinish = job.waitForCompletion(true);
	if(isFinish){
		logger.info("UpdateCFJob job ["+job.getJobName()+"] run finish.it costs"+ (endTime - startTime) / 1000 +"s.");
	} else {
		logger.error("UpdateCFJob job ["+job.getJobName()+"] run failed.");
	}
}
 
Example #3
Source File: IndexTool.java    From phoenix with Apache License 2.0 6 votes vote down vote up
private Job configureSubmittableJobUsingDirectApi(Job job) throws Exception {
    job.setReducerClass(PhoenixIndexImportDirectReducer.class);
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    // Set the Physical Table name for use in DirectHTableWriter#write(Mutation)
    conf.set(TableOutputFormat.OUTPUT_TABLE,
        PhoenixConfigurationUtil.getPhysicalTableName(job.getConfiguration()));
    //Set the Output classes
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    TableMapReduceUtil.addDependencyJars(job);
    job.setNumReduceTasks(1);
    return job;
}
 
Example #4
Source File: DirectHTableWriter.java    From phoenix with Apache License 2.0 6 votes vote down vote up
protected void setConf(Configuration otherConf) {
    this.conf = HBaseConfiguration.create(otherConf);

    String tableName = this.conf.get(TableOutputFormat.OUTPUT_TABLE);
    if (tableName == null || tableName.length() <= 0) {
        throw new IllegalArgumentException("Must specify table name");
    }

    try {
        this.conn = ConnectionFactory.createConnection(this.conf);
        this.table = conn.getTable(TableName.valueOf(tableName));
        LOGGER.info("Created table instance for " + tableName);
    } catch (IOException e) {
        LOGGER.error("IOException : ", e);
        tryClosingResourceSilently(this.conn);
        throw new RuntimeException(e);
    }
}
 
Example #5
Source File: IndexTool.java    From hgraphdb with Apache License 2.0 5 votes vote down vote up
/**
 * Uses the HBase Front Door Api to write to index table. Submits the job and either returns or
 * waits for the job completion based on runForeground parameter.
 * 
 * @param job job
 * @param outputPath output path
 * @param runForeground - if true, waits for job completion, else submits and returns
 *            immediately.
 * @throws Exception
 */
private void configureSubmittableJobUsingDirectApi(Job job, Path outputPath, TableName outputTableName,
                                                   boolean skipDependencyJars, boolean runForeground)
        throws Exception {
    job.setMapperClass(getDirectMapperClass());
    job.setReducerClass(getDirectReducerClass());
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    conf.set(TableOutputFormat.OUTPUT_TABLE, outputTableName.getNameAsString());

    //Set the Output classes
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    if (!skipDependencyJars) {
        TableMapReduceUtil.addDependencyJars(job);
    }
    job.setNumReduceTasks(1);

    if (!runForeground) {
        LOG.info("Running Index Build in Background - Submit async and exit");
        job.submit();
        return;
    }
    LOG.info("Running Index Build in Foreground. Waits for the build to complete. This may take a long time!.");
    boolean result = job.waitForCompletion(true);
    if (!result) {
        LOG.error("IndexTool job failed!");
        throw new Exception("IndexTool job failed: " + job.toString());
    }
    FileSystem.get(conf).delete(outputPath, true);
}
 
Example #6
Source File: HBaseStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public OutputFormat getOutputFormat() throws IOException {
    if (outputFormat == null) {
        if (m_conf == null) {
            throw new IllegalStateException("setStoreLocation has not been called");
        } else {
            this.outputFormat = new TableOutputFormat();
            this.outputFormat.setConf(m_conf);
        }
    }
    return outputFormat;
}
 
Example #7
Source File: PhoenixMRJobCallable.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Override
public Boolean call() throws Exception {
    StringBuilder commandLineArgBuilder = new StringBuilder();
    commandLineArgBuilder.append(" -dt " + indexInfo.getDataTableName());
    commandLineArgBuilder.append(" -it " + indexInfo.getTableName());
    commandLineArgBuilder.append(" -direct");
    commandLineArgBuilder.append(" -op " + (basePath.endsWith("/") ? basePath : basePath + "/")
            + indexInfo.getTableName());

    if (indexInfo.getTableSchem() != null && indexInfo.getTableSchem().trim().length() > 0) {
        commandLineArgBuilder.append(" -s " + indexInfo.getTableSchem());
    }
    // Setting the configuration here again (in addition to IndexTool.java) to doubly sure
    // configurations are set
    final String qDataTable =
            SchemaUtil.getTableName(indexInfo.getTableSchem(), indexInfo.getDataTableName());
    final String qIndexTable =
            SchemaUtil.getTableName(indexInfo.getTableSchem(), indexInfo.getTableName());
    String physicalIndexTable = qIndexTable;

    if (IndexType.LOCAL.equals(indexInfo.getIndexType())) {
        physicalIndexTable = MetaDataUtil.getLocalIndexTableName(qDataTable);
    }
    conf.set(TableOutputFormat.OUTPUT_TABLE, physicalIndexTable);

    IndexTool tool = new IndexTool();
    tool.setConf(conf);
    int result = tool.run(commandLineArgBuilder.toString().split(" "));
    return result == 0 ? true : false;
}
 
Example #8
Source File: LoadMapper.java    From hbase-secondary-index with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected void setup(Context context) throws IOException,
		InterruptedException {
	config = context.getConfiguration();
	table = new HTable(config, Bytes.toBytes(config
			.get(TableOutputFormat.OUTPUT_TABLE)));
}
 
Example #9
Source File: HBaseWriteExample.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = getTextDataSet(env);

		DataSet<Tuple2<String, Integer>> counts =
				// split up the lines in pairs (2-tuples) containing: (word, 1)
				text.flatMap(new Tokenizer())
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.sum(1);

		// emit result
		Job job = Job.getInstance();
		job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, outputTableName);
		// TODO is "mapred.output.dir" really useful?
		job.getConfiguration().set("mapred.output.dir", HBaseFlinkTestConstants.TMP_DIR);
		counts.map(new RichMapFunction <Tuple2<String, Integer>, Tuple2<Text, Mutation>>() {
			private transient Tuple2<Text, Mutation> reuse;

			@Override
			public void open(Configuration parameters) throws Exception {
				super.open(parameters);
				reuse = new Tuple2<Text, Mutation>();
			}

			@Override
			public Tuple2<Text, Mutation> map(Tuple2<String, Integer> t) throws Exception {
				reuse.f0 = new Text(t.f0);
				Put put = new Put(t.f0.getBytes(ConfigConstants.DEFAULT_CHARSET));
				put.add(HBaseFlinkTestConstants.CF_SOME, HBaseFlinkTestConstants.Q_SOME, Bytes.toBytes(t.f1));
				reuse.f1 = put;
				return reuse;
			}
		}).output(new HadoopOutputFormat<Text, Mutation>(new TableOutputFormat<Text>(), job));

		// execute program
		env.execute("WordCount (HBase sink) Example");
	}
 
Example #10
Source File: HBaseWriteExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = getTextDataSet(env);

		DataSet<Tuple2<String, Integer>> counts =
				// split up the lines in pairs (2-tuples) containing: (word, 1)
				text.flatMap(new Tokenizer())
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.sum(1);

		// emit result
		Job job = Job.getInstance();
		job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, outputTableName);
		// TODO is "mapred.output.dir" really useful?
		job.getConfiguration().set("mapred.output.dir", HBaseFlinkTestConstants.TMP_DIR);
		counts.map(new RichMapFunction <Tuple2<String, Integer>, Tuple2<Text, Mutation>>() {
			private transient Tuple2<Text, Mutation> reuse;

			@Override
			public void open(Configuration parameters) throws Exception {
				super.open(parameters);
				reuse = new Tuple2<Text, Mutation>();
			}

			@Override
			public Tuple2<Text, Mutation> map(Tuple2<String, Integer> t) throws Exception {
				reuse.f0 = new Text(t.f0);
				Put put = new Put(t.f0.getBytes(ConfigConstants.DEFAULT_CHARSET));
				put.add(HBaseFlinkTestConstants.CF_SOME, HBaseFlinkTestConstants.Q_SOME, Bytes.toBytes(t.f1));
				reuse.f1 = put;
				return reuse;
			}
		}).output(new HadoopOutputFormat<Text, Mutation>(new TableOutputFormat<Text>(), job));

		// execute program
		env.execute("WordCount (HBase sink) Example");
	}
 
Example #11
Source File: HBaseWriteExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = getTextDataSet(env);

		DataSet<Tuple2<String, Integer>> counts =
				// split up the lines in pairs (2-tuples) containing: (word, 1)
				text.flatMap(new Tokenizer())
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.sum(1);

		// emit result
		Job job = Job.getInstance();
		job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, outputTableName);
		// TODO is "mapred.output.dir" really useful?
		job.getConfiguration().set("mapred.output.dir", HBaseFlinkTestConstants.TMP_DIR);
		counts.map(new RichMapFunction <Tuple2<String, Integer>, Tuple2<Text, Mutation>>() {
			private transient Tuple2<Text, Mutation> reuse;

			@Override
			public void open(Configuration parameters) throws Exception {
				super.open(parameters);
				reuse = new Tuple2<Text, Mutation>();
			}

			@Override
			public Tuple2<Text, Mutation> map(Tuple2<String, Integer> t) throws Exception {
				reuse.f0 = new Text(t.f0);
				Put put = new Put(t.f0.getBytes(ConfigConstants.DEFAULT_CHARSET));
				put.add(HBaseFlinkTestConstants.CF_SOME, HBaseFlinkTestConstants.Q_SOME, Bytes.toBytes(t.f1));
				reuse.f1 = put;
				return reuse;
			}
		}).output(new HadoopOutputFormat<Text, Mutation>(new TableOutputFormat<Text>(), job));

		// execute program
		env.execute("WordCount (HBase sink) Example");
	}
 
Example #12
Source File: JobFileRawLoader.java    From hraven with Apache License 2.0 4 votes vote down vote up
/**
 * @param conf to use to create and run the job. Should be an HBase
 *          configuration.
 * @param input path to the processFile * @param totalJobCount the total
 *          number of jobs that need to be run in this batch. Used in job
 *          name.
 * @return whether all job confs were loaded properly.
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private boolean runRawLoaderJob(Configuration myHBaseConf, String input,
    int totalJobCount)
    throws IOException, InterruptedException, ClassNotFoundException {
  boolean success;

  // Turn off speculative execution.
  // Note: must be BEFORE the job construction with the new mapreduce API.
  myHBaseConf.setBoolean("mapred.map.tasks.speculative.execution", false);

  // Set up job
  Job job = new Job(myHBaseConf, getJobName(totalJobCount));
  job.setJarByClass(JobFileRawLoader.class);

  Path inputPath = new Path(input);

  if (hdfs.exists(inputPath)) {

    // Set input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);

    job.setMapperClass(JobFileRawLoaderMapper.class);

    // Set the output format to push data into HBase.
    job.setOutputFormatClass(TableOutputFormat.class);
    TableMapReduceUtil.initTableReducerJob(Constants.HISTORY_RAW_TABLE, null,
        job);

    job.setOutputKeyClass(JobFileRawLoaderMapper.getOutputKeyClass());
    job.setOutputValueClass(JobFileRawLoaderMapper.getOutputValueClass());

    // This is a map-only class, skip reduce step
    job.setNumReduceTasks(0);

    // Run the job
    success = job.waitForCompletion(true);

    if (success) {
      success = hdfs.delete(inputPath, false);
    }

  } else {
    System.err.println("Unable to find processFile: " + inputPath);
    success = false;
  }
  return success;
}
 
Example #13
Source File: SpliceTableMapReduceUtil.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The output Splice table name, The format should be Schema.tableName.
 * @param reducer  The reducer class to use.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary configuration.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param quorumAddress Distant cluster to write to; default is null for
 * output to the cluster that is designated in <code>hbase-site.xml</code>.
 * Set this String to the zookeeper ensemble of an alternate remote cluster
 * when you would have the reduce write a cluster that is other than the
 * default; e.g. copying tables between clusters, the source would be
 * designated by <code>hbase-site.xml</code> and this param would have the
 * ensemble address of the remote cluster.  The format to pass is particular.
 * Pass <code> &lt;hbase.zookeeper.quorum>:&lt;hbase.zookeeper.client.port>:&lt;zookeeper.znode.parent>
 * </code> such as <code>server,server2,server3:2181:/hbase</code>.
 * @param serverClass redefined hbase.regionserver.class
 * @param serverImpl redefined hbase.regionserver.client
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When determining the region count fails.
 * @throws SQLException
 */
public static void initTableReducerJob(String table,
                                       Class<? extends Reducer> reducer,Job job,
                                       Class partitioner,
                                       String quorumAddress,
                                       String serverClass,
                                       String serverImpl,boolean addDependencyJars,Class<? extends OutputFormat> outputformatClass) throws IOException{

    Configuration conf=job.getConfiguration();
    job.setOutputFormatClass(outputformatClass);
    if(reducer!=null) job.setReducerClass(reducer);
    conf.set(MRConstants.SPLICE_OUTPUT_TABLE_NAME,table);
    if(sqlUtil==null)
        sqlUtil=SMSQLUtil.getInstance(conf.get(MRConstants.SPLICE_JDBC_STR));
    // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
    String hbaseTableID=null;
    try{
        hbaseTableID=sqlUtil.getConglomID(table);
    }catch(SQLException e){
        // TODO Auto-generated catch block
        e.printStackTrace();
        throw new IOException(e);
    }
    conf.set(MRConstants.HBASE_OUTPUT_TABLE_NAME,table);

    if(quorumAddress!=null){
        // Calling this will validate the format
        ZKConfig.validateClusterKey(quorumAddress);
        conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
    }
    if(serverClass!=null && serverImpl!=null){
        conf.set(TableOutputFormat.REGION_SERVER_CLASS,serverClass);
        conf.set(TableOutputFormat.REGION_SERVER_IMPL,serverImpl);

    }
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Object.class);
    if(partitioner==HRegionPartitioner.class){
        job.setPartitionerClass(HRegionPartitioner.class);
        // TODO Where are the keys?
        int regions=getReduceNumberOfRegions(hbaseTableID);
        if(job.getNumReduceTasks()>regions){
            job.setNumReduceTasks(regions);
        }
    }else if(partitioner!=null){
        job.setPartitionerClass(partitioner);
    }

    if(addDependencyJars){
        addDependencyJars(job);
    }

    //initCredentials(job);
}
 
Example #14
Source File: SpliceTableMapReduceUtil.java    From spliceengine with GNU Affero General Public License v3.0 3 votes vote down vote up
/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The Splice output table.
 * @param reducer  The reducer class to use.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param quorumAddress Distant cluster to write to; default is null for
 * output to the cluster that is designated in <code>hbase-site.xml</code>.
 * Set this String to the zookeeper ensemble of an alternate remote cluster
 * when you would have the reduce write a cluster that is other than the
 * default; e.g. copying tables between clusters, the source would be
 * designated by <code>hbase-site.xml</code> and this param would have the
 * ensemble address of the remote cluster.  The format to pass is particular.
 * Pass <code> &lt;hbase.zookeeper.quorum>:&lt;hbase.zookeeper.client.port>:&lt;zookeeper.znode.parent>
 * </code> such as <code>server,server2,server3:2181:/hbase</code>.
 * @param serverClass redefined hbase.regionserver.class
 * @param serverImpl redefined hbase.regionserver.client
 * @throws IOException When determining the region count fails.
 * @throws SQLException
 */
public static void initTableReducerJob(String table,
                                       Class<? extends Reducer> reducer,Job job,
                                       Class partitioner,String quorumAddress,String serverClass,
                                       String serverImpl) throws IOException, SQLException{
    initTableReducerJob(table,reducer,job,partitioner,quorumAddress,
            serverClass,serverImpl,true,TableOutputFormat.class);
}