Java Code Examples for org.apache.hadoop.hbase.HBaseConfiguration#addHbaseResources()

The following examples show how to use org.apache.hadoop.hbase.HBaseConfiguration#addHbaseResources() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IndexUpgradeTool.java    From phoenix with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
public int executeTool() {
    Configuration conf = HBaseConfiguration.addHbaseResources(getConf());

    try (Connection conn = getConnection(conf)) {
        ConnectionQueryServices queryServices = conn.unwrap(PhoenixConnection.class)
                .getQueryServices();

        boolean status = extractTablesAndIndexes(conn.unwrap(PhoenixConnection.class));

        if (status) {
            return executeTool(conn, queryServices, conf);
        }
    } catch (SQLException e) {
        LOGGER.severe("Something went wrong in executing tool "+ e);
    }
    return -1;
}
 
Example 2
Source File: CsvBulkLoadTool.java    From phoenix with Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {

    Configuration conf = HBaseConfiguration.addHbaseResources(getConf());

    CommandLine cmdLine = null;
    try {
        cmdLine = parseOptions(args);
    } catch (IllegalStateException e) {
        printHelpAndExit(e.getMessage(), getOptions());
    }
    Class.forName(DriverManager.class.getName());
    Connection conn = DriverManager.getConnection(
            getJdbcUrl(cmdLine.getOptionValue(ZK_QUORUM_OPT.getOpt())));
    
    return loadData(conf, cmdLine, conn);
}
 
Example 3
Source File: IndexRebuildTask.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Override
public TaskRegionObserver.TaskResult checkCurrentResult(Task.TaskRecord taskRecord)
        throws Exception {

    String jobID = getJobID(taskRecord.getData());
    if (jobID != null) {
        Configuration conf = HBaseConfiguration.create(env.getConfiguration());
        Configuration configuration = HBaseConfiguration.addHbaseResources(conf);
        Cluster cluster = new Cluster(configuration);

        Job job = cluster.getJob(org.apache.hadoop.mapreduce.JobID.forName(jobID));
        if (job == null) {
            return new  TaskRegionObserver.TaskResult(TaskRegionObserver.TaskResultCode.SKIPPED, "");
        }
        if (job != null && job.isComplete()) {
            if (job.isSuccessful()) {
                LOGGER.warn("IndexRebuildTask checkCurrentResult job is successful "
                        + taskRecord.getTableName());
                return new TaskRegionObserver.TaskResult(TaskRegionObserver.TaskResultCode.SUCCESS, "");
            } else {
                return new TaskRegionObserver.TaskResult(TaskRegionObserver.TaskResultCode.FAIL,
                        "Index is DISABLED");
            }
        }

    }
    return null;
}
 
Example 4
Source File: IndexTool.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    CommandLine cmdLine;
    try {
        cmdLine = parseOptions(args);
    } catch (IllegalStateException e) {
        printHelpAndExit(e.getMessage(), getOptions());
        return -1;
    }
    configuration = HBaseConfiguration.addHbaseResources(getConf());
    populateIndexToolAttributes(cmdLine);
    if (tenantId != null) {
        configuration.set(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId);
    }
    try (Connection conn = getConnection(configuration)) {
        createIndexToolTables(conn);
        if (dataTable != null && indexTable != null) {
            setupIndexAndDataTable(conn);
            checkIfFeatureApplicable(startTime, endTime, lastVerifyTime, pDataTable, isLocalIndexBuild);
            if (shouldDeleteBeforeRebuild) {
                deleteBeforeRebuild(conn);
            }
        }
        preSplitIndexTable(cmdLine, conn);
        boolean result = submitIndexToolJob(conn, configuration);
        if (result) {
            return 0;
        } else {
            LOGGER.error("IndexTool job failed! Check logs for errors..");
            return -1;
        }
    } catch (Exception ex) {
        LOGGER.error("An exception occurred while performing the indexing job: "
            + ExceptionUtils.getMessage(ex) + " at:\n" + ExceptionUtils.getStackTrace(ex));
        return -1;
    }
}
 
Example 5
Source File: ExportHBaseTableToDelimiteredTxt.java    From HBase-ToHDFS with Apache License 2.0 4 votes vote down vote up
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException {
 if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToDelimiteredTxt {tableName} {ColumnFamily} {outputPath} {shouldCompressWithGz} {schemaLocationOnHdfs} {delimiter} {rowKeyColumn.Optional}");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String shouldCompression = args[3];
  String schemaFilePath = args[4];
  String delimiter = args[5];
  
  String rowKeyColumn = "";
  if (args.length > 6) {
    rowKeyColumn = args[6];
  }
  
  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  
  HBaseConfiguration.addHbaseResources(job.getConfiguration());
  
  job.getConfiguration().set(SHOULD_COMPRESSION_CONF, shouldCompression);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath);
  job.getConfiguration().set(DELIMITER_CONF, delimiter);

  job.setJarByClass(ExportHBaseTableToDelimiteredTxt.class);
  job.setJobName("ExportHBaseTableToDelimiteredTxt ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(NullOutputFormat.class); // because we aren't
                                                    // emitting anything from
                                                    // mapper

  job.setNumReduceTasks(0);
  
  boolean b = job.waitForCompletion(true);
}
 
Example 6
Source File: ExportHBaseTableToParquet.java    From HBase-ToHDFS with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];

  String rowKeyColumn = "";
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToParquet.class);
  job.setJobName("ExportHBaseTableToParquet ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());
  AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}
 
Example 7
Source File: PopulateTable.java    From HBase-ToHDFS with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

    if (args.length == 0) {
      System.out.println("PopulateSmallTable {numberOfMappers} {numberOfRecords} {tmpOutputPath} {tableName} {columnFamily} {runID}");
      return;
    }

    String numberOfMappers = args[0];
    String numberOfRecords = args[1];
    String outputPath = args[2];
    String tableName = args[3];
    String columnFamily = args[4];
    String runID = args[5];

    // Create job
    Job job = Job.getInstance();
    HBaseConfiguration.addHbaseResources(job.getConfiguration());

    job.setJarByClass(PopulateTable.class);
    job.setJobName("PopulateTable: " + runID);
    job.getConfiguration().set(NUMBER_OF_RECORDS, numberOfRecords);

    job.getConfiguration().set(TABLE_NAME, tableName);
    job.getConfiguration().set(COLUMN_FAMILY, columnFamily);
    job.getConfiguration().set(RUN_ID, runID);

    // Define input format and path
    job.setInputFormatClass(NMapInputFormat.class);
    NMapInputFormat.setNumMapTasks(job.getConfiguration(), Integer.parseInt(numberOfMappers));

    Configuration config = HBaseConfiguration.create();

    HTable hTable = new HTable(config, tableName);

    // Auto configure partitioner and reducer
    HFileOutputFormat.configureIncrementalLoad(job, hTable);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    // Define the mapper and reducer
    job.setMapperClass(CustomMapper.class);
    // job.setReducerClass(CustomReducer.class);

    // Define the key and value format
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);

    // Exit
    job.waitForCompletion(true);
    FileSystem hdfs = FileSystem.get(config);

    // Must all HBase to have write access to HFiles
    HFileUtils.changePermissionR(outputPath, hdfs);

    LoadIncrementalHFiles load = new LoadIncrementalHFiles(config);
    load.doBulkLoad(new Path(outputPath), hTable);

  }
 
Example 8
Source File: ExportHBaseTableToDelimiteredSeq.java    From HBase-ToHDFS with Apache License 2.0 4 votes vote down vote up
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException {
 if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToDelimiteredSeq {tableName} {ColumnFamily} {outputPath} {compressionCodec} {schemaLocationOnLocal} {delimiter} {rowKeyColumn.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String delimiter = args[5];

  String rowKeyColumn = "";
  if (args.length > 6) {
    rowKeyColumn = args[6];
  }
  
  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  
  HBaseConfiguration.addHbaseResources(job.getConfiguration());
  
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath);
  job.getConfiguration().set(DELIMITER_CONF, delimiter);

  job.setJarByClass(ExportHBaseTableToDelimiteredSeq.class);
  job.setJobName("ExportHBaseTableToDelimiteredSeq ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(SequenceFileOutputFormat.class); 
  SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
  
  if (compressionCodec.equals("snappy")) {
    SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    //nothing
  }
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(NullWritable.class);
  
  job.setNumReduceTasks(0);
  
  boolean b = job.waitForCompletion(true);
}
 
Example 9
Source File: ExportHBaseTableToAvro.java    From HBase-ToHDFS with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String rowKeyColumn = "";
  
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToAvro.class);
  job.setJobName("ExportHBaseTableToAvro ");

  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  
  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroKeyOutputFormat.class);
  AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());

  AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}
 
Example 10
Source File: CreateTable.java    From HBase-ToHDFS with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception{
  
  if (args.length == 0) {
    System.out.println("CreateTables {tableName} {columnFamilyName} {RegionCount}");
    return;
  }
  
  String tableName = args[0];
  String columnFamilyName = args[1];
  String regionCount = args[2];
  
  long regionMaxSize = 107374182400l;
  
  Configuration config = HBaseConfiguration.addHbaseResources(new Configuration());
  
  HBaseAdmin admin = new HBaseAdmin(config);
  
  createTable(tableName, columnFamilyName, Short.parseShort(regionCount), regionMaxSize, admin);
  
  admin.close();
  System.out.println("Done");
}
 
Example 11
Source File: HalyardStats.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
public int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String target = cmd.getOptionValue('t');
    String targetGraph = cmd.getOptionValue('g');
    String graphContext = cmd.getOptionValue('c');
    String thresh = cmd.getOptionValue('r');
    TableMapReduceUtil.addDependencyJars(getConf(),
        HalyardExport.class,
        NTriplesUtil.class,
        Rio.class,
        AbstractRDFHandler.class,
        RDFFormat.class,
        RDFParser.class,
        HTable.class,
        HBaseConfiguration.class,
        AuthenticationProtos.class,
        Trace.class,
        Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardStats " + source + (target == null ? " update" : " -> " + target));
    job.getConfiguration().set(SOURCE, source);
    if (target != null) job.getConfiguration().set(TARGET, target);
    if (targetGraph != null) job.getConfiguration().set(TARGET_GRAPH, targetGraph);
    if (graphContext != null) job.getConfiguration().set(GRAPH_CONTEXT, graphContext);
    if (thresh != null) job.getConfiguration().setLong(THRESHOLD, Long.parseLong(thresh));
    job.setJarByClass(HalyardStats.class);
    TableMapReduceUtil.initCredentials(job);

    Scan scan = HalyardTableUtils.scan(null, null);
    if (graphContext != null) { //restricting stats to scan given graph context only
        List<RowRange> ranges = new ArrayList<>();
        byte[] gcHash = HalyardTableUtils.hashKey(SimpleValueFactory.getInstance().createIRI(graphContext));
        ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, gcHash));
        ranges.add(rowRange(HalyardTableUtils.CPOS_PREFIX, gcHash));
        ranges.add(rowRange(HalyardTableUtils.COSP_PREFIX, gcHash));
        if (target == null) { //add stats context to the scanned row ranges (when in update mode) to delete the related stats during MapReduce
            ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, HalyardTableUtils.hashKey(targetGraph == null ? HALYARD.STATS_GRAPH_CONTEXT : SimpleValueFactory.getInstance().createIRI(targetGraph))));
        }
        scan.setFilter(new MultiRowRangeFilter(ranges));
    }
    TableMapReduceUtil.initTableMapperJob(
        source,
        scan,
        StatsMapper.class,
        ImmutableBytesWritable.class,
        LongWritable.class,
        job);
    job.setPartitionerClass(StatsPartitioner.class);
    job.setReducerClass(StatsReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (job.waitForCompletion(true)) {
        LOG.info("Stats Generation Completed..");
        return 0;
    }
    return -1;
}
 
Example 12
Source File: HalyardBulkUpdate.java    From Halyard with Apache License 2.0 4 votes vote down vote up
public int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String queryFiles = cmd.getOptionValue('q');
    String workdir = cmd.getOptionValue('w');
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis()))));
    if (cmd.hasOption('i')) getConf().set(ELASTIC_INDEX_URL, cmd.getOptionValue('i'));
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           NTriplesUtil.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setStrings(TABLE_NAME_PROPERTY, source);
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    int stages = 1;
    for (int stage = 0; stage < stages; stage++) {
        Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + workdir + " -> " + source + " stage #" + stage);
        job.getConfiguration().setInt(STAGE_PROPERTY, stage);
        job.setJarByClass(HalyardBulkUpdate.class);
        job.setMapperClass(SPARQLUpdateMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(KeyValue.class);
        job.setInputFormatClass(QueryInputFormat.class);
        job.setSpeculativeExecution(false);
        job.setReduceSpeculativeExecution(false);
        try (HTable hTable = HalyardTableUtils.getTable(getConf(), source, false, 0)) {
            HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
            QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, true, stage);
            Path outPath = new Path(workdir, "stage"+stage);
            FileOutputFormat.setOutputPath(job, outPath);
            TableMapReduceUtil.addDependencyJars(job);
            TableMapReduceUtil.initCredentials(job);
            if (stage == 0) { //count real number of stages
                for (InputSplit is : new QueryInputFormat().getSplits(job)) {
                    QueryInputFormat.QueryInputSplit qis = (QueryInputFormat.QueryInputSplit)is;
                    int updates = QueryParserUtil.parseUpdate(QueryLanguage.SPARQL, qis.getQuery(), null).getUpdateExprs().size();
                    if (updates > stages) {
                        stages = updates;
                    }
                    LOG.log(Level.INFO, "{0} contains {1} stages of the update sequence.", new Object[]{qis.getQueryName(), updates});
                }
                LOG.log(Level.INFO, "Bulk Update will process {0} MapReduce stages.", stages);
            }
            if (job.waitForCompletion(true)) {
                new LoadIncrementalHFiles(getConf()).doBulkLoad(outPath, hTable);
                LOG.log(Level.INFO, "Stage #{0} of {1} completed..", new Object[]{stage, stages});
            } else {
                return -1;
            }
        }
    }
    LOG.info("Bulk Update Completed..");
    return 0;
}
 
Example 13
Source File: HalyardBulkLoad.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String workdir = cmd.getOptionValue('w');
    String target = cmd.getOptionValue('t');
    getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
    getConf().setBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, cmd.hasOption('d'));
    getConf().setBoolean(TRUNCATE_PROPERTY, cmd.hasOption('r'));
    getConf().setInt(SPLIT_BITS_PROPERTY, Integer.parseInt(cmd.getOptionValue('b', "3")));
    if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
    getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis()))));
    if (cmd.hasOption('m')) getConf().setLong("mapreduce.input.fileinputformat.split.maxsize", Long.parseLong(cmd.getOptionValue('m')));
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + workdir + " -> " + target);
    job.setJarByClass(HalyardBulkLoad.class);
    job.setMapperClass(RDFMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), target, true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, source);
        FileOutputFormat.setOutputPath(job, new Path(workdir));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
                HalyardTableUtils.truncateTable(hTable).close();
            }
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(workdir), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}
 
Example 14
Source File: HalyardPreSplit.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String target = cmd.getOptionValue('t');
    try (Connection con = ConnectionFactory.createConnection(getConf())) {
        try (Admin admin = con.getAdmin()) {
            if (admin.tableExists(TableName.valueOf(target))) {
                LOG.log(Level.WARNING, "Pre-split cannot modify already existing table {0}", target);
                return -1;
            }
        }
    }
    getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
    if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
    getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    getConf().setInt(DECIMATION_FACTOR_PROPERTY, Integer.parseInt(cmd.getOptionValue('d', String.valueOf(DEFAULT_DECIMATION_FACTOR))));
    getConf().setLong(SPLIT_LIMIT_PROPERTY, Long.parseLong(cmd.getOptionValue('l', String.valueOf(DEFAULT_SPLIT_LIMIT))));
    Job job = Job.getInstance(getConf(), "HalyardPreSplit -> " + target);
     job.getConfiguration().set(TABLE_PROPERTY, target);
    job.setJarByClass(HalyardPreSplit.class);
    job.setMapperClass(RDFDecimatingMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job, source);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    job.setReducerClass(PreSplitReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (job.waitForCompletion(true)) {
        LOG.info("PreSplit Calculation Completed..");
        return 0;
    }
    return -1;
}
 
Example 15
Source File: HalyardSummary.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
public int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String target = cmd.getOptionValue('t');
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardSummary " + source + (target == null ? " update" : " -> " + target));
    job.getConfiguration().set(SOURCE, source);
    if (target != null) job.getConfiguration().set(TARGET, target);
    if (cmd.hasOption('g')) job.getConfiguration().set(TARGET_GRAPH, cmd.getOptionValue('g'));
    if (cmd.hasOption('d')) job.getConfiguration().setInt(DECIMATION_FACTOR, Integer.parseInt(cmd.getOptionValue('d')));
    job.setJarByClass(HalyardSummary.class);
    TableMapReduceUtil.initCredentials(job);

    Scan scan = HalyardTableUtils.scan(new byte[]{HalyardTableUtils.POS_PREFIX}, new byte[]{HalyardTableUtils.POS_PREFIX + 1});

    TableMapReduceUtil.initTableMapperJob(source,
            scan,
            SummaryMapper.class,
            ImmutableBytesWritable.class,
            LongWritable.class,
            job);
    job.setNumReduceTasks(1);
    job.setCombinerClass(SummaryCombiner.class);
    job.setReducerClass(SummaryReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (job.waitForCompletion(true)) {
        LOG.info("Summary Generation Completed..");
        return 0;
    }
    return -1;
}
 
Example 16
Source File: HalyardBulkDelete.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
public int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('t');
    TableMapReduceUtil.addDependencyJars(getConf(),
        HalyardExport.class,
        NTriplesUtil.class,
        Rio.class,
        AbstractRDFHandler.class,
        RDFFormat.class,
        RDFParser.class,
        HTable.class,
        HBaseConfiguration.class,
        AuthenticationProtos.class,
        Trace.class,
        Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardDelete " + source);
    if (cmd.hasOption('s')) {
        job.getConfiguration().set(SUBJECT, cmd.getOptionValue('s'));
    }
    if (cmd.hasOption('p')) {
        job.getConfiguration().set(PREDICATE, cmd.getOptionValue('p'));
    }
    if (cmd.hasOption('o')) {
        job.getConfiguration().set(OBJECT, cmd.getOptionValue('o'));
    }
    if (cmd.hasOption('g')) {
        job.getConfiguration().setStrings(CONTEXTS, cmd.getOptionValues('g'));
    }
    job.setJarByClass(HalyardBulkDelete.class);
    TableMapReduceUtil.initCredentials(job);

    Scan scan = HalyardTableUtils.scan(null, null);

    TableMapReduceUtil.initTableMapperJob(source,
        scan,
        DeleteMapper.class,
        ImmutableBytesWritable.class,
        LongWritable.class,
        job);

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setSpeculativeExecution(false);
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), source, false, 0)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileOutputFormat.setOutputPath(job, new Path(cmd.getOptionValue('f')));
        TableMapReduceUtil.addDependencyJars(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(cmd.getOptionValue('f')), hTable);
            LOG.info("Bulk Delete Completed..");
            return 0;
        }
    }
    return -1;
}
 
Example 17
Source File: HalyardBulkExport.java    From Halyard with Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    if (!cmd.getArgList().isEmpty()) throw new HalyardExport.ExportException("Unknown arguments: " + cmd.getArgList().toString());
    String source = cmd.getOptionValue('s');
    String queryFiles = cmd.getOptionValue('q');
    String target = cmd.getOptionValue('t');
    if (!target.contains("{0}")) {
        throw new HalyardExport.ExportException("Bulk export target must contain '{0}' to be replaced by stripped filename of the actual SPARQL query.");
    }
    getConf().set(SOURCE, source);
    getConf().set(TARGET, target);
    String driver = cmd.getOptionValue('c');
    if (driver != null) {
        getConf().set(JDBC_DRIVER, driver);
    }
    String props[] = cmd.getOptionValues('p');
    if (props != null) {
        for (int i=0; i<props.length; i++) {
            props[i] = Base64.encodeBase64String(props[i].getBytes(StandardCharsets.UTF_8));
        }
        getConf().setStrings(JDBC_PROPERTIES, props);
    }
    if (cmd.hasOption('i')) getConf().set(HalyardBulkUpdate.ELASTIC_INDEX_URL, cmd.getOptionValue('i'));
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           NTriplesUtil.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    String cp = cmd.getOptionValue('l');
    if (cp != null) {
        String jars[] = cp.split(":");
        StringBuilder newCp = new StringBuilder();
        for (int i=0; i<jars.length; i++) {
            if (i > 0) newCp.append(':');
            newCp.append(addTmpFile(jars[i])); //append clappspath entris to tmpfiles and trim paths from the classpath
        }
        getConf().set(JDBC_CLASSPATH, newCp.toString());
    }
    Job job = Job.getInstance(getConf(), "HalyardBulkExport " + source + " -> " + target);
    job.setJarByClass(HalyardBulkExport.class);
    job.setMaxMapAttempts(1);
    job.setMapperClass(BulkExportMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Void.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(QueryInputFormat.class);
    QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, false, 0);
    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initCredentials(job);
    if (job.waitForCompletion(true)) {
        LOG.info("Bulk Export Completed..");
        return 0;
    }
    return -1;
}