Java Code Examples for org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars()

The following examples show how to use org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HFileOutputFormat3.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat3.class);

    // Set compression algorithms based on column families
    configureCompression(conf, table.getTableDescriptor());
    configureBloomType(table.getTableDescriptor(), conf);
    configureBlockSize(table.getTableDescriptor(), conf);
    HTableDescriptor tableDescriptor = table.getTableDescriptor();
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + table.getName() + " output configured.");
}
 
Example 2
Source File: IntegrationTestLoadAndVerify.java    From hbase with Apache License 2.0 6 votes vote down vote up
protected Job doLoad(Configuration conf, TableDescriptor tableDescriptor) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "load-output");
  LOG.info("Load output dir: " + outputDir);

  NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
  conf.set(TABLE_NAME_KEY, tableDescriptor.getTableName().getNameAsString());

  Job job = Job.getInstance(conf);
  job.setJobName(TEST_NAME + " Load for " + tableDescriptor.getTableName());
  job.setJarByClass(this.getClass());
  setMapperClass(job);
  job.setInputFormatClass(NMapInputFormat.class);
  job.setNumReduceTasks(0);
  setJobScannerConf(job);
  FileOutputFormat.setOutputPath(job, outputDir);

  TableMapReduceUtil.addDependencyJars(job);

  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);
  assertTrue(job.waitForCompletion(true));
  return job;
}
 
Example 3
Source File: IndexTool.java    From phoenix with Apache License 2.0 6 votes vote down vote up
private Job configureSubmittableJobUsingDirectApi(Job job) throws Exception {
    job.setReducerClass(PhoenixIndexImportDirectReducer.class);
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    // Set the Physical Table name for use in DirectHTableWriter#write(Mutation)
    conf.set(TableOutputFormat.OUTPUT_TABLE,
        PhoenixConfigurationUtil.getPhysicalTableName(job.getConfiguration()));
    //Set the Output classes
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    TableMapReduceUtil.addDependencyJars(job);
    job.setNumReduceTasks(1);
    return job;
}
 
Example 4
Source File: HFileOutputFormat3.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat3.class);

    // Set compression algorithms based on column families
    configureCompression(conf, table.getTableDescriptor());
    configureBloomType(table.getTableDescriptor(), conf);
    configureBlockSize(table.getTableDescriptor(), conf);
    HTableDescriptor tableDescriptor = table.getTableDescriptor();
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + table.getName() + " output configured.");
}
 
Example 5
Source File: IndexScrutinyTool.java    From phoenix with Apache License 2.0 6 votes vote down vote up
private Job configureSubmittableJob(Job job, Path outputPath, Class<IndexScrutinyMapperForTest> mapperClass) throws Exception {
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapreduce.job.user.classpath.first", true);
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    job.setJarByClass(IndexScrutinyTool.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (outputInvalidRows && OutputFormat.FILE.equals(outputFormat)) {
        job.setOutputFormatClass(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, outputPath);
    }
    job.setMapperClass((mapperClass == null ? IndexScrutinyMapper.class : mapperClass));
    job.setNumReduceTasks(0);
    // Set the Output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    TableMapReduceUtil.addDependencyJars(job);
    return job;
}
 
Example 6
Source File: IndexTool.java    From hgraphdb with Apache License 2.0 5 votes vote down vote up
/**
 * Uses the HBase Front Door Api to write to index table. Submits the job and either returns or
 * waits for the job completion based on runForeground parameter.
 * 
 * @param job job
 * @param outputPath output path
 * @param runForeground - if true, waits for job completion, else submits and returns
 *            immediately.
 * @throws Exception
 */
private void configureSubmittableJobUsingDirectApi(Job job, Path outputPath, TableName outputTableName,
                                                   boolean skipDependencyJars, boolean runForeground)
        throws Exception {
    job.setMapperClass(getDirectMapperClass());
    job.setReducerClass(getDirectReducerClass());
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    conf.set(TableOutputFormat.OUTPUT_TABLE, outputTableName.getNameAsString());

    //Set the Output classes
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    if (!skipDependencyJars) {
        TableMapReduceUtil.addDependencyJars(job);
    }
    job.setNumReduceTasks(1);

    if (!runForeground) {
        LOG.info("Running Index Build in Background - Submit async and exit");
        job.submit();
        return;
    }
    LOG.info("Running Index Build in Foreground. Waits for the build to complete. This may take a long time!.");
    boolean result = job.waitForCompletion(true);
    if (!result) {
        LOG.error("IndexTool job failed!");
        throw new Exception("IndexTool job failed: " + job.toString());
    }
    FileSystem.get(conf).delete(outputPath, true);
}
 
Example 7
Source File: UpdateStatisticsTool.java    From phoenix with Apache License 2.0 5 votes vote down vote up
private void configureJob() throws Exception {
    job = Job.getInstance(getConf(),
            "UpdateStatistics-" + tableName + "-" + snapshotName);
    PhoenixMapReduceUtil.setInput(job, NullDBWritable.class,
            snapshotName, tableName, restoreDir);

    PhoenixConfigurationUtil.setMRJobType(job.getConfiguration(), MRJobType.UPDATE_STATS);

    // DO NOT allow mapper splits using statistics since it may result into many smaller chunks
    PhoenixConfigurationUtil.setSplitByStats(job.getConfiguration(), false);

    job.setJarByClass(UpdateStatisticsTool.class);
    job.setMapperClass(TableSnapshotMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);
    job.setPriority(this.jobPriority);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), PhoenixConnection.class, Chronology.class,
            CharStream.class, TransactionSystemClient.class, TransactionNotInProgressException.class,
            ZKClient.class, DiscoveryServiceClient.class, ZKDiscoveryService.class,
            Cancellable.class, TTransportException.class, SpanReceiver.class, TransactionProcessor.class, Gauge.class, MetricRegistriesImpl.class);
    LOGGER.info("UpdateStatisticsTool running for: " + tableName
            + " on snapshot: " + snapshotName + " with restore dir: " + restoreDir);
}
 
Example 8
Source File: PerformanceEvaluation.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Run a mapreduce job.  Run as many maps as asked-for clients.
 * Before we start up the job, write out an input file with instruction
 * per client regards which row they are to start on.
 * @param cmd Command to run.
 */
private void doMapReduce(final Class<? extends Test> cmd)
    throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}
 
Example 9
Source File: MapReduceHFileSplitterJob.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Sets up the actual job.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public Job createSubmittableJob(String[] args) throws IOException {
  Configuration conf = getConf();
  String inputDirs = args[0];
  String tabName = args[1];
  conf.setStrings(TABLES_KEY, tabName);
  conf.set(FileInputFormat.INPUT_DIR, inputDirs);
  Job job =
      Job.getInstance(conf,
        conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime()));
  job.setJarByClass(MapReduceHFileSplitterJob.class);
  job.setInputFormatClass(HFileInputFormat.class);
  job.setMapOutputKeyClass(ImmutableBytesWritable.class);
  String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
  if (hfileOutPath != null) {
    LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
    TableName tableName = TableName.valueOf(tabName);
    job.setMapperClass(HFileCellMapper.class);
    job.setReducerClass(CellSortReducer.class);
    Path outputDir = new Path(hfileOutPath);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputValueClass(MapReduceExtendedCell.class);
    try (Connection conn = ConnectionFactory.createConnection(conf);
        Table table = conn.getTable(tableName);
        RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
      HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
    }
    LOG.debug("success configuring load incremental job");

    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
      org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class);
  } else {
    throw new IOException("No bulk output directory specified");
  }
  return job;
}
 
Example 10
Source File: CompactionTool.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Execute compaction, using a Map-Reduce job.
 */
private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
    final boolean compactOnce, final boolean major) throws Exception {
  Configuration conf = getConf();
  conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
  conf.setBoolean(CONF_COMPACT_MAJOR, major);

  Job job = new Job(conf);
  job.setJobName("CompactionTool");
  job.setJarByClass(CompactionTool.class);
  job.setMapperClass(CompactionMapper.class);
  job.setInputFormatClass(CompactionInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapSpeculativeExecution(false);
  job.setNumReduceTasks(0);

  // add dependencies (including HBase ones)
  TableMapReduceUtil.addDependencyJars(job);

  Path stagingDir = JobUtil.getQualifiedStagingDir(conf);
  FileSystem stagingFs = stagingDir.getFileSystem(conf);
  try {
    // Create input file with the store dirs
    Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
    List<Path> storeDirs = CompactionInputFormat.createInputFile(fs, stagingFs,
        inputPath, toCompactDirs);
    CompactionInputFormat.addInputPath(job, inputPath);

    // Initialize credential for secure cluster
    TableMapReduceUtil.initCredentials(job);
    // Despite the method name this will get delegation token for the filesystem
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      storeDirs.toArray(new Path[0]), conf);

    // Start the MR Job and wait
    return job.waitForCompletion(true) ? 0 : 1;
  } finally {
    fs.delete(stagingDir, true);
  }
}
 
Example 11
Source File: IntegrationTestBigLinkedListWithVisibility.java    From hbase with Apache License 2.0 5 votes vote down vote up
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException,
    ClassNotFoundException {
  job = new Job(getConf());

  job.setJobName("Link Verifier");
  job.setNumReduceTasks(numReducers);
  job.setJarByClass(getClass());

  setJobScannerConf(job);

  Scan scan = new Scan();
  scan.addColumn(FAMILY_NAME, COLUMN_PREV);
  scan.setCaching(10000);
  scan.setCacheBlocks(false);
  String[] split = labels.split(COMMA);

  scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2],
      split[(this.labelIndex * 2) + 1]));

  TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);

  job.setReducerClass(VerifyReducer.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, outputDir);
  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}
 
Example 12
Source File: IntegrationTestBigLinkedList.java    From hbase with Apache License 2.0 5 votes vote down vote up
public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMultiplier, Integer numWalkers)
    throws Exception {
  LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
  createSchema();
  job = Job.getInstance(getConf());

  job.setJobName("Link Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  FileInputFormat.setInputPaths(job, tmpOutput);
  job.setInputFormatClass(OneFilePerMapperSFIF.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);

  setMapperForGenerator(job);

  job.setOutputFormatClass(NullOutputFormat.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                                                 AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}
 
Example 13
Source File: HalyardBulkExport.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    if (!cmd.getArgList().isEmpty()) throw new HalyardExport.ExportException("Unknown arguments: " + cmd.getArgList().toString());
    String source = cmd.getOptionValue('s');
    String queryFiles = cmd.getOptionValue('q');
    String target = cmd.getOptionValue('t');
    if (!target.contains("{0}")) {
        throw new HalyardExport.ExportException("Bulk export target must contain '{0}' to be replaced by stripped filename of the actual SPARQL query.");
    }
    getConf().set(SOURCE, source);
    getConf().set(TARGET, target);
    String driver = cmd.getOptionValue('c');
    if (driver != null) {
        getConf().set(JDBC_DRIVER, driver);
    }
    String props[] = cmd.getOptionValues('p');
    if (props != null) {
        for (int i=0; i<props.length; i++) {
            props[i] = Base64.encodeBase64String(props[i].getBytes(StandardCharsets.UTF_8));
        }
        getConf().setStrings(JDBC_PROPERTIES, props);
    }
    if (cmd.hasOption('i')) getConf().set(HalyardBulkUpdate.ELASTIC_INDEX_URL, cmd.getOptionValue('i'));
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           NTriplesUtil.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    String cp = cmd.getOptionValue('l');
    if (cp != null) {
        String jars[] = cp.split(":");
        StringBuilder newCp = new StringBuilder();
        for (int i=0; i<jars.length; i++) {
            if (i > 0) newCp.append(':');
            newCp.append(addTmpFile(jars[i])); //append clappspath entris to tmpfiles and trim paths from the classpath
        }
        getConf().set(JDBC_CLASSPATH, newCp.toString());
    }
    Job job = Job.getInstance(getConf(), "HalyardBulkExport " + source + " -> " + target);
    job.setJarByClass(HalyardBulkExport.class);
    job.setMaxMapAttempts(1);
    job.setMapperClass(BulkExportMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Void.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(QueryInputFormat.class);
    QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, false, 0);
    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initCredentials(job);
    if (job.waitForCompletion(true)) {
        LOG.info("Bulk Export Completed..");
        return 0;
    }
    return -1;
}
 
Example 14
Source File: MultiHfileOutputFormat.java    From phoenix with Apache License 2.0 4 votes vote down vote up
/**
 * Configures the job for MultiHfileOutputFormat.
 * @param job
 * @param tablesToBeLoaded
 * @throws IOException
 */
@SuppressWarnings("deprecation")
public static void configureIncrementalLoad(Job job, List<TargetTableRef> tablesToBeLoaded) throws IOException {
    
    Configuration conf = job.getConfiguration();
    job.setOutputFormatClass(MultiHfileOutputFormat.class);
    conf.setStrings("io.serializations", conf.get("io.serializations"),
            MutationSerialization.class.getName(), ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());

    // tableStartKeys for all tables.
    Set<TableRowkeyPair> tablesStartKeys = Sets.newTreeSet();
    for(TargetTableRef table : tablesToBeLoaded) {
       final String tableName = table.getPhysicalName();
       try(Connection hbaseConn = ConnectionFactory.createConnection(conf);){
            Set<TableRowkeyPair> startKeys =
                    getRegionStartKeys(tableName,
                        hbaseConn.getRegionLocator(TableName.valueOf(tableName)));
           tablesStartKeys.addAll(startKeys);
           TableDescriptor tableDescriptor = hbaseConn.getTable(TableName.valueOf(tableName)).getDescriptor();
           String compressionConfig = configureCompression(tableDescriptor);
           String bloomTypeConfig = configureBloomType(tableDescriptor);
           String blockSizeConfig = configureBlockSize(tableDescriptor);
           String blockEncodingConfig = configureDataBlockEncoding(tableDescriptor);
           Map<String,String> tableConfigs = Maps.newHashMap();
           if(StringUtils.isNotBlank(compressionConfig)) {
               tableConfigs.put(COMPRESSION_FAMILIES_CONF_KEY, compressionConfig);
           }
           if(StringUtils.isNotBlank(bloomTypeConfig)) {
               tableConfigs.put(BLOOM_TYPE_FAMILIES_CONF_KEY,bloomTypeConfig);
           }
           if(StringUtils.isNotBlank(blockSizeConfig)) {
               tableConfigs.put(BLOCK_SIZE_FAMILIES_CONF_KEY,blockSizeConfig);
           }
           if(StringUtils.isNotBlank(blockEncodingConfig)) {
               tableConfigs.put(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,blockEncodingConfig);
           }
           table.setConfiguration(tableConfigs);
           final String tableDefns = TargetTableRefFunctions.TO_JSON.apply(table);
           // set the table definition in the config to be used during the RecordWriter..
           conf.set(tableName, tableDefns);
           
           TargetTableRef tbl = TargetTableRefFunctions.FROM_JSON.apply(tableDefns);
           LOGGER.info(" the table logical name is "+ tbl.getLogicalName());
       }
   }

   LOGGER.info("Configuring " + tablesStartKeys.size() + " reduce partitions to match current region count");
   job.setNumReduceTasks(tablesStartKeys.size());

   configurePartitioner(job, tablesStartKeys);
   TableMapReduceUtil.addDependencyJars(job);
   TableMapReduceUtil.initCredentials(job);
    
}
 
Example 15
Source File: HalyardSummary.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
public int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String target = cmd.getOptionValue('t');
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardSummary " + source + (target == null ? " update" : " -> " + target));
    job.getConfiguration().set(SOURCE, source);
    if (target != null) job.getConfiguration().set(TARGET, target);
    if (cmd.hasOption('g')) job.getConfiguration().set(TARGET_GRAPH, cmd.getOptionValue('g'));
    if (cmd.hasOption('d')) job.getConfiguration().setInt(DECIMATION_FACTOR, Integer.parseInt(cmd.getOptionValue('d')));
    job.setJarByClass(HalyardSummary.class);
    TableMapReduceUtil.initCredentials(job);

    Scan scan = HalyardTableUtils.scan(new byte[]{HalyardTableUtils.POS_PREFIX}, new byte[]{HalyardTableUtils.POS_PREFIX + 1});

    TableMapReduceUtil.initTableMapperJob(source,
            scan,
            SummaryMapper.class,
            ImmutableBytesWritable.class,
            LongWritable.class,
            job);
    job.setNumReduceTasks(1);
    job.setCombinerClass(SummaryCombiner.class);
    job.setReducerClass(SummaryReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (job.waitForCompletion(true)) {
        LOG.info("Summary Generation Completed..");
        return 0;
    }
    return -1;
}
 
Example 16
Source File: HalyardPreSplit.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String target = cmd.getOptionValue('t');
    try (Connection con = ConnectionFactory.createConnection(getConf())) {
        try (Admin admin = con.getAdmin()) {
            if (admin.tableExists(TableName.valueOf(target))) {
                LOG.log(Level.WARNING, "Pre-split cannot modify already existing table {0}", target);
                return -1;
            }
        }
    }
    getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
    if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
    getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    getConf().setInt(DECIMATION_FACTOR_PROPERTY, Integer.parseInt(cmd.getOptionValue('d', String.valueOf(DEFAULT_DECIMATION_FACTOR))));
    getConf().setLong(SPLIT_LIMIT_PROPERTY, Long.parseLong(cmd.getOptionValue('l', String.valueOf(DEFAULT_SPLIT_LIMIT))));
    Job job = Job.getInstance(getConf(), "HalyardPreSplit -> " + target);
     job.getConfiguration().set(TABLE_PROPERTY, target);
    job.setJarByClass(HalyardPreSplit.class);
    job.setMapperClass(RDFDecimatingMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job, source);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    job.setReducerClass(PreSplitReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (job.waitForCompletion(true)) {
        LOG.info("PreSplit Calculation Completed..");
        return 0;
    }
    return -1;
}
 
Example 17
Source File: HalyardBulkLoad.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String workdir = cmd.getOptionValue('w');
    String target = cmd.getOptionValue('t');
    getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
    getConf().setBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, cmd.hasOption('d'));
    getConf().setBoolean(TRUNCATE_PROPERTY, cmd.hasOption('r'));
    getConf().setInt(SPLIT_BITS_PROPERTY, Integer.parseInt(cmd.getOptionValue('b', "3")));
    if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
    getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis()))));
    if (cmd.hasOption('m')) getConf().setLong("mapreduce.input.fileinputformat.split.maxsize", Long.parseLong(cmd.getOptionValue('m')));
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + workdir + " -> " + target);
    job.setJarByClass(HalyardBulkLoad.class);
    job.setMapperClass(RDFMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), target, true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, source);
        FileOutputFormat.setOutputPath(job, new Path(workdir));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
                HalyardTableUtils.truncateTable(hTable).close();
            }
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(workdir), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}
 
Example 18
Source File: HalyardBulkUpdate.java    From Halyard with Apache License 2.0 4 votes vote down vote up
public int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String queryFiles = cmd.getOptionValue('q');
    String workdir = cmd.getOptionValue('w');
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis()))));
    if (cmd.hasOption('i')) getConf().set(ELASTIC_INDEX_URL, cmd.getOptionValue('i'));
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           NTriplesUtil.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setStrings(TABLE_NAME_PROPERTY, source);
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    int stages = 1;
    for (int stage = 0; stage < stages; stage++) {
        Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + workdir + " -> " + source + " stage #" + stage);
        job.getConfiguration().setInt(STAGE_PROPERTY, stage);
        job.setJarByClass(HalyardBulkUpdate.class);
        job.setMapperClass(SPARQLUpdateMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(KeyValue.class);
        job.setInputFormatClass(QueryInputFormat.class);
        job.setSpeculativeExecution(false);
        job.setReduceSpeculativeExecution(false);
        try (HTable hTable = HalyardTableUtils.getTable(getConf(), source, false, 0)) {
            HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
            QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, true, stage);
            Path outPath = new Path(workdir, "stage"+stage);
            FileOutputFormat.setOutputPath(job, outPath);
            TableMapReduceUtil.addDependencyJars(job);
            TableMapReduceUtil.initCredentials(job);
            if (stage == 0) { //count real number of stages
                for (InputSplit is : new QueryInputFormat().getSplits(job)) {
                    QueryInputFormat.QueryInputSplit qis = (QueryInputFormat.QueryInputSplit)is;
                    int updates = QueryParserUtil.parseUpdate(QueryLanguage.SPARQL, qis.getQuery(), null).getUpdateExprs().size();
                    if (updates > stages) {
                        stages = updates;
                    }
                    LOG.log(Level.INFO, "{0} contains {1} stages of the update sequence.", new Object[]{qis.getQueryName(), updates});
                }
                LOG.log(Level.INFO, "Bulk Update will process {0} MapReduce stages.", stages);
            }
            if (job.waitForCompletion(true)) {
                new LoadIncrementalHFiles(getConf()).doBulkLoad(outPath, hTable);
                LOG.log(Level.INFO, "Stage #{0} of {1} completed..", new Object[]{stage, stages});
            } else {
                return -1;
            }
        }
    }
    LOG.info("Bulk Update Completed..");
    return 0;
}
 
Example 19
Source File: HFileOutputFormat3.java    From kylin with Apache License 2.0 4 votes vote down vote up
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator,
        Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
        // record this table name for creating writer by favored nodes
        LOG.info("bulkload locality sensitive enabled");
        conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString());
    }
    
    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + tableDescriptor.getTableName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
}
 
Example 20
Source File: ExportSnapshot.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Run Map-Reduce Job to perform the files copy.
 */
private void runCopyJob(final Path inputRoot, final Path outputRoot,
    final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
    final String filesUser, final String filesGroup, final int filesMode,
    final int mappers, final int bandwidthMB)
        throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
  if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
  if (mappers > 0) {
    conf.setInt(CONF_NUM_SPLITS, mappers);
    conf.setInt(MR_NUM_MAPS, mappers);
  }
  conf.setInt(CONF_FILES_MODE, filesMode);
  conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
  conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
  conf.set(CONF_INPUT_ROOT, inputRoot.toString());
  conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
  conf.set(CONF_SNAPSHOT_NAME, snapshotName);
  conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());

  String jobname = conf.get(CONF_MR_JOB_NAME, "ExportSnapshot-" + snapshotName);
  Job job = new Job(conf);
  job.setJobName(jobname);
  job.setJarByClass(ExportSnapshot.class);
  TableMapReduceUtil.addDependencyJars(job);
  job.setMapperClass(ExportMapper.class);
  job.setInputFormatClass(ExportSnapshotInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapSpeculativeExecution(false);
  job.setNumReduceTasks(0);

  // Acquire the delegation Tokens
  Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
    new Path[] { inputRoot }, srcConf);
  Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outputRoot }, destConf);

  // Run the MR Job
  if (!job.waitForCompletion(true)) {
    throw new ExportSnapshotException(job.getStatus().getFailureInfo());
  }
}