org.apache.hadoop.hbase.mapreduce.KeyValueSerialization Java Examples

The following examples show how to use org.apache.hadoop.hbase.mapreduce.KeyValueSerialization. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HFileOutputFormat3.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator,
        Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + tableDescriptor.getTableName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
}
 
Example #2
Source File: HFileOutputFormat3.java    From kylin with Apache License 2.0 4 votes vote down vote up
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator,
        Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
        // record this table name for creating writer by favored nodes
        LOG.info("bulkload locality sensitive enabled");
        conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString());
    }
    
    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + tableDescriptor.getTableName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
}
 
Example #3
Source File: MultiHfileOutputFormat.java    From phoenix with Apache License 2.0 4 votes vote down vote up
/**
 * Configures the job for MultiHfileOutputFormat.
 * @param job
 * @param tablesToBeLoaded
 * @throws IOException
 */
@SuppressWarnings("deprecation")
public static void configureIncrementalLoad(Job job, List<TargetTableRef> tablesToBeLoaded) throws IOException {
    
    Configuration conf = job.getConfiguration();
    job.setOutputFormatClass(MultiHfileOutputFormat.class);
    conf.setStrings("io.serializations", conf.get("io.serializations"),
            MutationSerialization.class.getName(), ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());

    // tableStartKeys for all tables.
    Set<TableRowkeyPair> tablesStartKeys = Sets.newTreeSet();
    for(TargetTableRef table : tablesToBeLoaded) {
       final String tableName = table.getPhysicalName();
       try(Connection hbaseConn = ConnectionFactory.createConnection(conf);){
            Set<TableRowkeyPair> startKeys =
                    getRegionStartKeys(tableName,
                        hbaseConn.getRegionLocator(TableName.valueOf(tableName)));
           tablesStartKeys.addAll(startKeys);
           TableDescriptor tableDescriptor = hbaseConn.getTable(TableName.valueOf(tableName)).getDescriptor();
           String compressionConfig = configureCompression(tableDescriptor);
           String bloomTypeConfig = configureBloomType(tableDescriptor);
           String blockSizeConfig = configureBlockSize(tableDescriptor);
           String blockEncodingConfig = configureDataBlockEncoding(tableDescriptor);
           Map<String,String> tableConfigs = Maps.newHashMap();
           if(StringUtils.isNotBlank(compressionConfig)) {
               tableConfigs.put(COMPRESSION_FAMILIES_CONF_KEY, compressionConfig);
           }
           if(StringUtils.isNotBlank(bloomTypeConfig)) {
               tableConfigs.put(BLOOM_TYPE_FAMILIES_CONF_KEY,bloomTypeConfig);
           }
           if(StringUtils.isNotBlank(blockSizeConfig)) {
               tableConfigs.put(BLOCK_SIZE_FAMILIES_CONF_KEY,blockSizeConfig);
           }
           if(StringUtils.isNotBlank(blockEncodingConfig)) {
               tableConfigs.put(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,blockEncodingConfig);
           }
           table.setConfiguration(tableConfigs);
           final String tableDefns = TargetTableRefFunctions.TO_JSON.apply(table);
           // set the table definition in the config to be used during the RecordWriter..
           conf.set(tableName, tableDefns);
           
           TargetTableRef tbl = TargetTableRefFunctions.FROM_JSON.apply(tableDefns);
           LOGGER.info(" the table logical name is "+ tbl.getLogicalName());
       }
   }

   LOGGER.info("Configuring " + tablesStartKeys.size() + " reduce partitions to match current region count");
   job.setNumReduceTasks(tablesStartKeys.size());

   configurePartitioner(job, tablesStartKeys);
   TableMapReduceUtil.addDependencyJars(job);
   TableMapReduceUtil.initCredentials(job);
    
}