Java Code Examples for org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initCredentials()

The following examples show how to use org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initCredentials() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HFileOutputFormat3.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat3.class);

    // Set compression algorithms based on column families
    configureCompression(conf, table.getTableDescriptor());
    configureBloomType(table.getTableDescriptor(), conf);
    configureBlockSize(table.getTableDescriptor(), conf);
    HTableDescriptor tableDescriptor = table.getTableDescriptor();
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + table.getName() + " output configured.");
}
 
Example 2
Source File: HFileOutputFormat3.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat3.class);

    // Set compression algorithms based on column families
    configureCompression(conf, table.getTableDescriptor());
    configureBloomType(table.getTableDescriptor(), conf);
    configureBlockSize(table.getTableDescriptor(), conf);
    HTableDescriptor tableDescriptor = table.getTableDescriptor();
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + table.getName() + " output configured.");
}
 
Example 3
Source File: PhoenixHBaseLoader.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Override
public void setLocation(String location, Job job) throws IOException {        
    final Configuration configuration = job.getConfiguration();
    //explicitly turning off combining splits. 
    configuration.setBoolean("pig.noSplitCombination", true);
    //to have phoenix working on a secured cluster
    TableMapReduceUtil.initCredentials(job);
    this.initializePhoenixPigConfiguration(location, configuration);
}
 
Example 4
Source File: PerformanceEvaluation.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Run a mapreduce job.  Run as many maps as asked-for clients.
 * Before we start up the job, write out an input file with instruction
 * per client regards which row they are to start on.
 * @param cmd Command to run.
 */
private void doMapReduce(final Class<? extends Test> cmd)
    throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}
 
Example 5
Source File: IntegrationTestBigLinkedList.java    From hbase with Apache License 2.0 5 votes vote down vote up
public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMultiplier, Integer numWalkers)
    throws Exception {
  LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
  createSchema();
  job = Job.getInstance(getConf());

  job.setJobName("Link Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  FileInputFormat.setInputPaths(job, tmpOutput);
  job.setInputFormatClass(OneFilePerMapperSFIF.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);

  setMapperForGenerator(job);

  job.setOutputFormatClass(NullOutputFormat.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                                                 AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}
 
Example 6
Source File: PhoenixConfigurationUtil.java    From phoenix with Apache License 2.0 5 votes vote down vote up
public static void loadHBaseConfiguration(Job job) throws IOException {
    // load hbase-site.xml
    Configuration hbaseConf = HBaseConfiguration.create();
    for (Map.Entry<String, String> entry : hbaseConf) {
        if (job.getConfiguration().get(entry.getKey()) == null) {
            job.getConfiguration().set(entry.getKey(), entry.getValue());
        }
    }
    //In order to have phoenix working on a secured cluster
    TableMapReduceUtil.initCredentials(job);
}
 
Example 7
Source File: HFileOutputFormat3.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator,
        Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + tableDescriptor.getTableName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
}
 
Example 8
Source File: UpdateStatisticsTool.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    parseArgs(args);
    preJobTask();
    configureJob();
    TableMapReduceUtil.initCredentials(job);
    int ret = runJob();
    postJobTask();
    return ret;
}
 
Example 9
Source File: HalyardBulkLoad.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String workdir = cmd.getOptionValue('w');
    String target = cmd.getOptionValue('t');
    getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
    getConf().setBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, cmd.hasOption('d'));
    getConf().setBoolean(TRUNCATE_PROPERTY, cmd.hasOption('r'));
    getConf().setInt(SPLIT_BITS_PROPERTY, Integer.parseInt(cmd.getOptionValue('b', "3")));
    if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
    getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis()))));
    if (cmd.hasOption('m')) getConf().setLong("mapreduce.input.fileinputformat.split.maxsize", Long.parseLong(cmd.getOptionValue('m')));
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + workdir + " -> " + target);
    job.setJarByClass(HalyardBulkLoad.class);
    job.setMapperClass(RDFMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), target, true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, source);
        FileOutputFormat.setOutputPath(job, new Path(workdir));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
                HalyardTableUtils.truncateTable(hTable).close();
            }
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(workdir), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}
 
Example 10
Source File: CsvBulkLoadTool.java    From phoenix with Apache License 2.0 4 votes vote down vote up
@Override
public Boolean call() {
    LOG.info("Configuring HFile output path to {}", outputPath);
    try{
     Job job = new Job(conf, "Phoenix MapReduce import for " + tableName);
	
     // Allow overriding the job jar setting by using a -D system property at startup
     if (job.getJar() == null) {
         job.setJarByClass(CsvToKeyValueMapper.class);
     }
     job.setInputFormatClass(TextInputFormat.class);
     FileInputFormat.addInputPath(job, inputPath);
     FileOutputFormat.setOutputPath(job, outputPath);
	
     job.setMapperClass(CsvToKeyValueMapper.class);
     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
     job.setMapOutputValueClass(KeyValue.class);

     // initialize credentials to possibily run in a secure env
     TableMapReduceUtil.initCredentials(job);

        HTable htable = new HTable(conf, tableName);

     // Auto configure partitioner and reducer according to the Main Data table
     HFileOutputFormat.configureIncrementalLoad(job, htable);
	
     LOG.info("Running MapReduce import job from {} to {}", inputPath, outputPath);
     boolean success = job.waitForCompletion(true);
     if (!success) {
         LOG.error("Import job failed, check JobTracker for details");
         htable.close();
         return false;
     }
	
     LOG.info("Loading HFiles from {}", outputPath);
     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
     loader.doBulkLoad(outputPath, htable);
     htable.close();
	
     LOG.info("Incremental load complete for table=" + tableName);
	
     LOG.info("Removing output directory {}", outputPath);
     if (!FileSystem.get(conf).delete(outputPath, true)) {
         LOG.error("Removing output directory {} failed", outputPath);
     }
     
     return true;
    } catch(Exception ex) {
    	LOG.error("Import job on table=" + tableName + " failed due to exception:" + ex);
    	return false;
    }
}
 
Example 11
Source File: HalyardPreSplit.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String target = cmd.getOptionValue('t');
    try (Connection con = ConnectionFactory.createConnection(getConf())) {
        try (Admin admin = con.getAdmin()) {
            if (admin.tableExists(TableName.valueOf(target))) {
                LOG.log(Level.WARNING, "Pre-split cannot modify already existing table {0}", target);
                return -1;
            }
        }
    }
    getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
    if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
    getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    getConf().setInt(DECIMATION_FACTOR_PROPERTY, Integer.parseInt(cmd.getOptionValue('d', String.valueOf(DEFAULT_DECIMATION_FACTOR))));
    getConf().setLong(SPLIT_LIMIT_PROPERTY, Long.parseLong(cmd.getOptionValue('l', String.valueOf(DEFAULT_SPLIT_LIMIT))));
    Job job = Job.getInstance(getConf(), "HalyardPreSplit -> " + target);
     job.getConfiguration().set(TABLE_PROPERTY, target);
    job.setJarByClass(HalyardPreSplit.class);
    job.setMapperClass(RDFDecimatingMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job, source);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    job.setReducerClass(PreSplitReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (job.waitForCompletion(true)) {
        LOG.info("PreSplit Calculation Completed..");
        return 0;
    }
    return -1;
}
 
Example 12
Source File: HFileOutputFormat3.java    From kylin with Apache License 2.0 4 votes vote down vote up
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator,
        Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
        // record this table name for creating writer by favored nodes
        LOG.info("bulkload locality sensitive enabled");
        conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString());
    }
    
    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + tableDescriptor.getTableName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
}
 
Example 13
Source File: HalyardSummary.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
public int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String target = cmd.getOptionValue('t');
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardSummary " + source + (target == null ? " update" : " -> " + target));
    job.getConfiguration().set(SOURCE, source);
    if (target != null) job.getConfiguration().set(TARGET, target);
    if (cmd.hasOption('g')) job.getConfiguration().set(TARGET_GRAPH, cmd.getOptionValue('g'));
    if (cmd.hasOption('d')) job.getConfiguration().setInt(DECIMATION_FACTOR, Integer.parseInt(cmd.getOptionValue('d')));
    job.setJarByClass(HalyardSummary.class);
    TableMapReduceUtil.initCredentials(job);

    Scan scan = HalyardTableUtils.scan(new byte[]{HalyardTableUtils.POS_PREFIX}, new byte[]{HalyardTableUtils.POS_PREFIX + 1});

    TableMapReduceUtil.initTableMapperJob(source,
            scan,
            SummaryMapper.class,
            ImmutableBytesWritable.class,
            LongWritable.class,
            job);
    job.setNumReduceTasks(1);
    job.setCombinerClass(SummaryCombiner.class);
    job.setReducerClass(SummaryReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (job.waitForCompletion(true)) {
        LOG.info("Summary Generation Completed..");
        return 0;
    }
    return -1;
}
 
Example 14
Source File: HalyardBulkDelete.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
public int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('t');
    TableMapReduceUtil.addDependencyJars(getConf(),
        HalyardExport.class,
        NTriplesUtil.class,
        Rio.class,
        AbstractRDFHandler.class,
        RDFFormat.class,
        RDFParser.class,
        HTable.class,
        HBaseConfiguration.class,
        AuthenticationProtos.class,
        Trace.class,
        Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardDelete " + source);
    if (cmd.hasOption('s')) {
        job.getConfiguration().set(SUBJECT, cmd.getOptionValue('s'));
    }
    if (cmd.hasOption('p')) {
        job.getConfiguration().set(PREDICATE, cmd.getOptionValue('p'));
    }
    if (cmd.hasOption('o')) {
        job.getConfiguration().set(OBJECT, cmd.getOptionValue('o'));
    }
    if (cmd.hasOption('g')) {
        job.getConfiguration().setStrings(CONTEXTS, cmd.getOptionValues('g'));
    }
    job.setJarByClass(HalyardBulkDelete.class);
    TableMapReduceUtil.initCredentials(job);

    Scan scan = HalyardTableUtils.scan(null, null);

    TableMapReduceUtil.initTableMapperJob(source,
        scan,
        DeleteMapper.class,
        ImmutableBytesWritable.class,
        LongWritable.class,
        job);

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setSpeculativeExecution(false);
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), source, false, 0)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileOutputFormat.setOutputPath(job, new Path(cmd.getOptionValue('f')));
        TableMapReduceUtil.addDependencyJars(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(cmd.getOptionValue('f')), hTable);
            LOG.info("Bulk Delete Completed..");
            return 0;
        }
    }
    return -1;
}
 
Example 15
Source File: HalyardBulkExport.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    if (!cmd.getArgList().isEmpty()) throw new HalyardExport.ExportException("Unknown arguments: " + cmd.getArgList().toString());
    String source = cmd.getOptionValue('s');
    String queryFiles = cmd.getOptionValue('q');
    String target = cmd.getOptionValue('t');
    if (!target.contains("{0}")) {
        throw new HalyardExport.ExportException("Bulk export target must contain '{0}' to be replaced by stripped filename of the actual SPARQL query.");
    }
    getConf().set(SOURCE, source);
    getConf().set(TARGET, target);
    String driver = cmd.getOptionValue('c');
    if (driver != null) {
        getConf().set(JDBC_DRIVER, driver);
    }
    String props[] = cmd.getOptionValues('p');
    if (props != null) {
        for (int i=0; i<props.length; i++) {
            props[i] = Base64.encodeBase64String(props[i].getBytes(StandardCharsets.UTF_8));
        }
        getConf().setStrings(JDBC_PROPERTIES, props);
    }
    if (cmd.hasOption('i')) getConf().set(HalyardBulkUpdate.ELASTIC_INDEX_URL, cmd.getOptionValue('i'));
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           NTriplesUtil.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    String cp = cmd.getOptionValue('l');
    if (cp != null) {
        String jars[] = cp.split(":");
        StringBuilder newCp = new StringBuilder();
        for (int i=0; i<jars.length; i++) {
            if (i > 0) newCp.append(':');
            newCp.append(addTmpFile(jars[i])); //append clappspath entris to tmpfiles and trim paths from the classpath
        }
        getConf().set(JDBC_CLASSPATH, newCp.toString());
    }
    Job job = Job.getInstance(getConf(), "HalyardBulkExport " + source + " -> " + target);
    job.setJarByClass(HalyardBulkExport.class);
    job.setMaxMapAttempts(1);
    job.setMapperClass(BulkExportMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Void.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(QueryInputFormat.class);
    QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, false, 0);
    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initCredentials(job);
    if (job.waitForCompletion(true)) {
        LOG.info("Bulk Export Completed..");
        return 0;
    }
    return -1;
}
 
Example 16
Source File: MultiHfileOutputFormat.java    From phoenix with Apache License 2.0 4 votes vote down vote up
/**
 * Configures the job for MultiHfileOutputFormat.
 * @param job
 * @param tablesToBeLoaded
 * @throws IOException
 */
@SuppressWarnings("deprecation")
public static void configureIncrementalLoad(Job job, List<TargetTableRef> tablesToBeLoaded) throws IOException {
    
    Configuration conf = job.getConfiguration();
    job.setOutputFormatClass(MultiHfileOutputFormat.class);
    conf.setStrings("io.serializations", conf.get("io.serializations"),
            MutationSerialization.class.getName(), ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());

    // tableStartKeys for all tables.
    Set<TableRowkeyPair> tablesStartKeys = Sets.newTreeSet();
    for(TargetTableRef table : tablesToBeLoaded) {
       final String tableName = table.getPhysicalName();
       try(Connection hbaseConn = ConnectionFactory.createConnection(conf);){
            Set<TableRowkeyPair> startKeys =
                    getRegionStartKeys(tableName,
                        hbaseConn.getRegionLocator(TableName.valueOf(tableName)));
           tablesStartKeys.addAll(startKeys);
           TableDescriptor tableDescriptor = hbaseConn.getTable(TableName.valueOf(tableName)).getDescriptor();
           String compressionConfig = configureCompression(tableDescriptor);
           String bloomTypeConfig = configureBloomType(tableDescriptor);
           String blockSizeConfig = configureBlockSize(tableDescriptor);
           String blockEncodingConfig = configureDataBlockEncoding(tableDescriptor);
           Map<String,String> tableConfigs = Maps.newHashMap();
           if(StringUtils.isNotBlank(compressionConfig)) {
               tableConfigs.put(COMPRESSION_FAMILIES_CONF_KEY, compressionConfig);
           }
           if(StringUtils.isNotBlank(bloomTypeConfig)) {
               tableConfigs.put(BLOOM_TYPE_FAMILIES_CONF_KEY,bloomTypeConfig);
           }
           if(StringUtils.isNotBlank(blockSizeConfig)) {
               tableConfigs.put(BLOCK_SIZE_FAMILIES_CONF_KEY,blockSizeConfig);
           }
           if(StringUtils.isNotBlank(blockEncodingConfig)) {
               tableConfigs.put(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,blockEncodingConfig);
           }
           table.setConfiguration(tableConfigs);
           final String tableDefns = TargetTableRefFunctions.TO_JSON.apply(table);
           // set the table definition in the config to be used during the RecordWriter..
           conf.set(tableName, tableDefns);
           
           TargetTableRef tbl = TargetTableRefFunctions.FROM_JSON.apply(tableDefns);
           LOGGER.info(" the table logical name is "+ tbl.getLogicalName());
       }
   }

   LOGGER.info("Configuring " + tablesStartKeys.size() + " reduce partitions to match current region count");
   job.setNumReduceTasks(tablesStartKeys.size());

   configurePartitioner(job, tablesStartKeys);
   TableMapReduceUtil.addDependencyJars(job);
   TableMapReduceUtil.initCredentials(job);
    
}
 
Example 17
Source File: HalyardStats.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
public int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String target = cmd.getOptionValue('t');
    String targetGraph = cmd.getOptionValue('g');
    String graphContext = cmd.getOptionValue('c');
    String thresh = cmd.getOptionValue('r');
    TableMapReduceUtil.addDependencyJars(getConf(),
        HalyardExport.class,
        NTriplesUtil.class,
        Rio.class,
        AbstractRDFHandler.class,
        RDFFormat.class,
        RDFParser.class,
        HTable.class,
        HBaseConfiguration.class,
        AuthenticationProtos.class,
        Trace.class,
        Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardStats " + source + (target == null ? " update" : " -> " + target));
    job.getConfiguration().set(SOURCE, source);
    if (target != null) job.getConfiguration().set(TARGET, target);
    if (targetGraph != null) job.getConfiguration().set(TARGET_GRAPH, targetGraph);
    if (graphContext != null) job.getConfiguration().set(GRAPH_CONTEXT, graphContext);
    if (thresh != null) job.getConfiguration().setLong(THRESHOLD, Long.parseLong(thresh));
    job.setJarByClass(HalyardStats.class);
    TableMapReduceUtil.initCredentials(job);

    Scan scan = HalyardTableUtils.scan(null, null);
    if (graphContext != null) { //restricting stats to scan given graph context only
        List<RowRange> ranges = new ArrayList<>();
        byte[] gcHash = HalyardTableUtils.hashKey(SimpleValueFactory.getInstance().createIRI(graphContext));
        ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, gcHash));
        ranges.add(rowRange(HalyardTableUtils.CPOS_PREFIX, gcHash));
        ranges.add(rowRange(HalyardTableUtils.COSP_PREFIX, gcHash));
        if (target == null) { //add stats context to the scanned row ranges (when in update mode) to delete the related stats during MapReduce
            ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, HalyardTableUtils.hashKey(targetGraph == null ? HALYARD.STATS_GRAPH_CONTEXT : SimpleValueFactory.getInstance().createIRI(targetGraph))));
        }
        scan.setFilter(new MultiRowRangeFilter(ranges));
    }
    TableMapReduceUtil.initTableMapperJob(
        source,
        scan,
        StatsMapper.class,
        ImmutableBytesWritable.class,
        LongWritable.class,
        job);
    job.setPartitionerClass(StatsPartitioner.class);
    job.setReducerClass(StatsReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (job.waitForCompletion(true)) {
        LOG.info("Stats Generation Completed..");
        return 0;
    }
    return -1;
}
 
Example 18
Source File: IndexScrutinyTool.java    From phoenix with Apache License 2.0 4 votes vote down vote up
public Job createSubmittableJob(String schemaName, String indexTable, String dataTable,
        SourceTable sourceTable, Class<IndexScrutinyMapperForTest> mapperClass) throws Exception {
    Preconditions.checkArgument(SourceTable.DATA_TABLE_SOURCE.equals(sourceTable)
            || SourceTable.INDEX_TABLE_SOURCE.equals(sourceTable));

    final String qDataTable = SchemaUtil.getQualifiedTableName(schemaName, dataTable);
    final String qIndexTable;
    if (schemaName != null && !schemaName.isEmpty()) {
        qIndexTable = SchemaUtil.getQualifiedTableName(schemaName, indexTable);
    } else {
        qIndexTable = indexTable;
    }
    PhoenixConfigurationUtil.setScrutinyDataTable(configuration, qDataTable);
    PhoenixConfigurationUtil.setScrutinyIndexTable(configuration, qIndexTable);
    PhoenixConfigurationUtil.setScrutinySourceTable(configuration, sourceTable);
    PhoenixConfigurationUtil.setScrutinyOutputInvalidRows(configuration, outputInvalidRows);
    PhoenixConfigurationUtil.setScrutinyOutputMax(configuration, outputMaxRows);

    final PTable pdataTable = PhoenixRuntime.getTable(connection, qDataTable);
    final PTable pindexTable = PhoenixRuntime.getTable(connection, qIndexTable);

    // set CURRENT_SCN for our scan so that incoming writes don't throw off scrutiny
    configuration.set(PhoenixConfigurationUtil.CURRENT_SCN_VALUE, Long.toString(ts));

    // set the source table to either data or index table
    SourceTargetColumnNames columnNames =
            SourceTable.DATA_TABLE_SOURCE.equals(sourceTable)
                    ? new SourceTargetColumnNames.DataSourceColNames(pdataTable,
                            pindexTable)
                    : new SourceTargetColumnNames.IndexSourceColNames(pdataTable,
                            pindexTable);
    String qSourceTable = columnNames.getQualifiedSourceTableName();
    List<String> sourceColumnNames = columnNames.getSourceColNames();
    List<String> sourceDynamicCols = columnNames.getSourceDynamicCols();
    List<String> targetDynamicCols = columnNames.getTargetDynamicCols();

    // Setup the select query against source - we either select the index columns from the
    // index table,
    // or select the data table equivalents of the index columns from the data table
    final String selectQuery =
            QueryUtil.constructSelectStatement(qSourceTable, sourceColumnNames, null,
                Hint.NO_INDEX, true);
    LOGGER.info("Query used on source table to feed the mapper: " + selectQuery);

    PhoenixConfigurationUtil.setScrutinyOutputFormat(configuration, outputFormat);
    // if outputting to table, setup the upsert to the output table
    if (outputInvalidRows && OutputFormat.TABLE.equals(outputFormat)) {
        String upsertStmt =
                IndexScrutinyTableOutput.constructOutputTableUpsert(sourceDynamicCols,
                    targetDynamicCols, connection);
        PhoenixConfigurationUtil.setUpsertStatement(configuration, upsertStmt);
        LOGGER.info("Upsert statement used for output table: " + upsertStmt);
    }

    final String jobName =
            String.format(INDEX_JOB_NAME_TEMPLATE, qSourceTable,
                columnNames.getQualifiedTargetTableName());
    final Job job = Job.getInstance(configuration, jobName);

    if (!useSnapshot) {
        PhoenixMapReduceUtil.setInput(job, PhoenixIndexDBWritable.class, qDataTable,
            selectQuery);
    } else { // TODO check if using a snapshot works
        Admin admin = null;
        String snapshotName;
        try {
            final PhoenixConnection pConnection =
                    connection.unwrap(PhoenixConnection.class);
            admin = pConnection.getQueryServices().getAdmin();
            String pdataTableName = pdataTable.getName().getString();
            snapshotName = new StringBuilder(pdataTableName).append("-Snapshot").toString();
            admin.snapshot(snapshotName, TableName.valueOf(pdataTableName));
        } finally {
            if (admin != null) {
                admin.close();
            }
        }
        // root dir not a subdirectory of hbase dir
        Path rootDir = new Path("hdfs:///index-snapshot-dir");
        FSUtils.setRootDir(configuration, rootDir);

        // set input for map reduce job using hbase snapshots
        //PhoenixMapReduceUtil.setInput(job, PhoenixIndexDBWritable.class, snapshotName,
        //    qDataTable, restoreDir, selectQuery);
    }
    TableMapReduceUtil.initCredentials(job);
    Path outputPath =
            getOutputPath(configuration, basePath,
                SourceTable.DATA_TABLE_SOURCE.equals(sourceTable) ? pdataTable
                        : pindexTable);

    return configureSubmittableJob(job, outputPath, mapperClass);
}
 
Example 19
Source File: IndexTool.java    From phoenix with Apache License 2.0 4 votes vote down vote up
private Job configureJobForAsyncIndex() throws Exception {
    String physicalIndexTable = pIndexTable.getPhysicalName().getString();
    final PhoenixConnection pConnection = connection.unwrap(PhoenixConnection.class);
    final PostIndexDDLCompiler ddlCompiler =
            new PostIndexDDLCompiler(pConnection, new TableRef(pDataTable));
    ddlCompiler.compile(pIndexTable);
    final List<String> indexColumns = ddlCompiler.getIndexColumnNames();
    final String selectQuery = ddlCompiler.getSelectQuery();
    final String upsertQuery =
            QueryUtil.constructUpsertStatement(qIndexTable, indexColumns, Hint.NO_INDEX);

    configuration.set(PhoenixConfigurationUtil.UPSERT_STATEMENT, upsertQuery);
    PhoenixConfigurationUtil.setPhysicalTableName(configuration, physicalIndexTable);
    PhoenixConfigurationUtil.setDisableIndexes(configuration, indexTable);

    PhoenixConfigurationUtil.setUpsertColumnNames(configuration,
        indexColumns.toArray(new String[indexColumns.size()]));
    if (tenantId != null) {
        PhoenixConfigurationUtil.setTenantId(configuration, tenantId);
    }
    final List<ColumnInfo> columnMetadataList =
            PhoenixRuntime.generateColumnInfo(connection, qIndexTable, indexColumns);
    ColumnInfoToStringEncoderDecoder.encode(configuration, columnMetadataList);

    if (outputPath != null) {
        fs = outputPath.getFileSystem(configuration);
        fs.delete(outputPath, true);
    }
    final String jobName = String.format(INDEX_JOB_NAME_TEMPLATE, schemaName, dataTable, indexTable);
    final Job job = Job.getInstance(configuration, jobName);
    job.setJarByClass(IndexTool.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    if (outputPath != null) {
        FileOutputFormat.setOutputPath(job, outputPath);
    }

    if (!useSnapshot) {
        PhoenixMapReduceUtil.setInput(job, PhoenixIndexDBWritable.class, qDataTable, selectQuery);
    } else {
        Admin admin = null;
        String snapshotName;
        try {
            admin = pConnection.getQueryServices().getAdmin();
            String pdataTableName = pDataTable.getName().getString();
            snapshotName = new StringBuilder(pdataTableName).append("-Snapshot").toString();
            admin.snapshot(snapshotName, TableName.valueOf(pdataTableName));
        } finally {
            if (admin != null) {
                admin.close();
            }
        }
        // root dir not a subdirectory of hbase dir
        Path rootDir = new Path("hdfs:///index-snapshot-dir");
        FSUtils.setRootDir(configuration, rootDir);
        Path restoreDir = new Path(FSUtils.getRootDir(configuration), "restore-dir");

        // set input for map reduce job using hbase snapshots
        PhoenixMapReduceUtil
                    .setInput(job, PhoenixIndexDBWritable.class, snapshotName, qDataTable, restoreDir, selectQuery);
    }
    TableMapReduceUtil.initCredentials(job);
    
    job.setMapperClass(PhoenixIndexImportDirectMapper.class);
    return configureSubmittableJobUsingDirectApi(job);
}
 
Example 20
Source File: IndexTool.java    From phoenix with Apache License 2.0 4 votes vote down vote up
private Job configureJobForServerBuildIndex() throws Exception {
    long indexRebuildQueryTimeoutMs =
            configuration.getLong(QueryServices.INDEX_REBUILD_QUERY_TIMEOUT_ATTRIB,
                    QueryServicesOptions.DEFAULT_INDEX_REBUILD_QUERY_TIMEOUT);
    long indexRebuildRPCTimeoutMs =
            configuration.getLong(QueryServices.INDEX_REBUILD_RPC_TIMEOUT_ATTRIB,
                    QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_TIMEOUT);
    long indexRebuildClientScannerTimeOutMs =
            configuration.getLong(QueryServices.INDEX_REBUILD_CLIENT_SCANNER_TIMEOUT_ATTRIB,
                    QueryServicesOptions.DEFAULT_INDEX_REBUILD_CLIENT_SCANNER_TIMEOUT);
    int indexRebuildRpcRetriesCounter =
            configuration.getInt(QueryServices.INDEX_REBUILD_RPC_RETRIES_COUNTER,
                    QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER);
    // Set various phoenix and hbase level timeouts and rpc retries
    configuration.set(QueryServices.THREAD_TIMEOUT_MS_ATTRIB,
            Long.toString(indexRebuildQueryTimeoutMs));
    configuration.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD,
            Long.toString(indexRebuildClientScannerTimeOutMs));
    configuration.set(HConstants.HBASE_RPC_TIMEOUT_KEY,
            Long.toString(indexRebuildRPCTimeoutMs));
    configuration.set(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
            Long.toString(indexRebuildRpcRetriesCounter));
    configuration.set("mapreduce.task.timeout", Long.toString(indexRebuildQueryTimeoutMs));

    PhoenixConfigurationUtil.setIndexToolDataTableName(configuration, qDataTable);
    PhoenixConfigurationUtil.setIndexToolIndexTableName(configuration, qIndexTable);
    if (startTime != null) {
        PhoenixConfigurationUtil.setIndexToolStartTime(configuration, startTime);
    }
    PhoenixConfigurationUtil.setIndexVerifyType(configuration, indexVerifyType);
    PhoenixConfigurationUtil.setDisableLoggingVerifyType(configuration, disableLoggingType);
    String physicalIndexTable = pIndexTable.getPhysicalName().getString();

    PhoenixConfigurationUtil.setPhysicalTableName(configuration, physicalIndexTable);
    PhoenixConfigurationUtil.setDisableIndexes(configuration, indexTable);
    if (tenantId != null) {
        PhoenixConfigurationUtil.setTenantId(configuration, tenantId);
    }

    if (outputPath != null) {
        fs = outputPath.getFileSystem(configuration);
        fs.delete(outputPath, true);
    }
    final String jobName = String.format(INDEX_JOB_NAME_TEMPLATE, schemaName, dataTable, indexTable);
    final Job job = Job.getInstance(configuration, jobName);
    job.setJarByClass(IndexTool.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    if (outputPath != null) {
        FileOutputFormat.setOutputPath(job, outputPath);
    }

    PhoenixMapReduceUtil.setInput(job, PhoenixServerBuildIndexDBWritable.class, PhoenixServerBuildIndexInputFormat.class,
                    qDataTable, "");

    TableMapReduceUtil.initCredentials(job);
    job.setMapperClass(PhoenixServerBuildIndexMapper.class);
    return configureSubmittableJobUsingDirectApi(job);
}