Java Code Examples for org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initTableMapperJob()
The following examples show how to use
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initTableMapperJob() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TransformerBaseRunner.java From BigDataArchitect with Apache License 2.0 | 6 votes |
/** * 创建job * * @param conf * @return * @throws IOException */ protected Job initJob(Configuration conf) throws IOException { Job job = Job.getInstance(conf, this.jobName); job.setJarByClass(this.runnerClass); // 本地运行 TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, false); // 集群运行:本地提交和打包(jar)提交 // TableMapReduceUtil.initTableMapperJob(initScans(job), // this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, // job); job.setReducerClass(this.reducerClass); job.setOutputKeyClass(this.outputKeyClass); job.setOutputValueClass(this.outputValueClass); job.setOutputFormatClass(this.outputFormatClass); return job; }
Example 2
Source File: TransformBaseRunner.java From BigDataPlatform with GNU General Public License v3.0 | 6 votes |
protected Job initJob(Configuration conf) throws IOException { Job job = Job.getInstance(conf, this.jobName); job.setJarByClass(this.runnerClass); // 本地运行 // TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, false); TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, true); // 集群运行:本地提交和打包(jar)提交 // TableMapReduceUtil.initTableMapperJob(initScans(job), // this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, // job); job.setReducerClass(this.reducerClass); job.setOutputKeyClass(this.outputKeyClass); job.setOutputValueClass(this.outputValueClass); job.setOutputFormatClass(this.outputFormatClass); return job; }
Example 3
Source File: JobFileProcessor.java From hraven with Apache License 2.0 | 6 votes |
/** * @param conf to use to create and run the job * @param scan to be used to scan the raw table. * @param totalJobCount the total number of jobs that need to be run in this * batch. Used in job name. * @return The job to be submitted to the cluster. * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private Job getProcessingJob(Configuration conf, Scan scan, int totalJobCount) throws IOException { Configuration confClone = new Configuration(conf); // Turn off speculative execution. // Note: must be BEFORE the job construction with the new mapreduce API. confClone.setBoolean("mapred.map.tasks.speculative.execution", false); // Set up job Job job = new Job(confClone, getJobName(totalJobCount)); // This is a map-only class, skip reduce step job.setNumReduceTasks(0); job.setJarByClass(JobFileProcessor.class); job.setOutputFormatClass(MultiTableOutputFormat.class); TableMapReduceUtil.initTableMapperJob(Constants.HISTORY_RAW_TABLE, scan, JobFileTableMapper.class, JobFileTableMapper.getOutputKeyClass(), JobFileTableMapper.getOutputValueClass(), job); return job; }
Example 4
Source File: TableInputFormatTest.java From hgraphdb with Apache License 2.0 | 6 votes |
private void runTestOnTable() throws InterruptedException, ClassNotFoundException { Job job = null; try { Configuration conf = graph.configuration().toHBaseConfiguration(); job = Job.getInstance(conf, "test123"); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_NAME); scan.setTimeRange(MINSTAMP, MAXSTAMP); scan.setMaxVersions(); TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job, true, TableInputFormat.class); job.waitForCompletion(true); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (job != null) { FileUtil.fullyDelete( new File(job.getConfiguration().get("hadoop.tmp.dir"))); } } }
Example 5
Source File: LeastRecentlyUsedPruner.java From metron with Apache License 2.0 | 6 votes |
public static void setupHBaseJob(Job job, String sourceTable, String cf) throws IOException { Scan scan = new Scan(); if(cf != null) { scan.addFamily(Bytes.toBytes(cf)); } scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs // set other scan attrs TableMapReduceUtil.initTableMapperJob( sourceTable, // input table scan, // Scan instance to control CF and attribute selection PrunerMapper.class, // mapper class null, // mapper output key null, // mapper output value job); TableMapReduceUtil.initTableReducerJob( sourceTable, // output table null, // reducer class job); }
Example 6
Source File: CellCounter.java From cloud-bigtable-examples with Apache License 2.0 | 6 votes |
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2]: ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
Example 7
Source File: IntegrationTestWithCellVisibilityLoadAndVerify.java From hbase with Apache License 2.0 | 6 votes |
private Job doVerify(Configuration conf, TableDescriptor tableDescriptor, String... auths) throws IOException, InterruptedException, ClassNotFoundException { Path outputDir = getTestDir(TEST_NAME, "verify-output"); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + tableDescriptor.getTableName()); setJobScannerConf(job); Scan scan = new Scan(); scan.setAuthorizations(new Authorizations(auths)); TableMapReduceUtil.initTableMapperJob(tableDescriptor.getTableName().getNameAsString(), scan, VerifyMapper.class, NullWritable.class, NullWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, scannerCaching); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); return job; }
Example 8
Source File: ActiveUserRunner.java From BigDataArchitect with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); // 初始化参数 this.processArgs(conf, args); // 创建job Job job = Job.getInstance(conf, "active_user"); // 设置job相关配置参数 job.setJarByClass(ActiveUserRunner.class); // hbase 输入mapper参数 // 1. 本地运行 TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.class, StatsUserDimension.class, TimeOutputValue.class, job, false); // 2. 集群运行 // TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.class, // StatsUserDimension.class, TimeOutputValue.class, job); // 设置reducer相关参数 job.setReducerClass(ActiveUserReducer.class); job.setOutputKeyClass(StatsUserDimension.class); job.setOutputValueClass(MapWritableValue.class); // 设置output相关参数 job.setOutputFormatClass(TransformerOutputFormat.class); // 开始毫秒数 long startTime = System.currentTimeMillis(); try { return job.waitForCompletion(true) ? 0 : -1; } finally { // 结束的毫秒数 long endTime = System.currentTimeMillis(); logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:" + endTime + "; 用时:" + (endTime - startTime) + "ms"); } }
Example 9
Source File: NewInstallUserRunner.java From BigDataArchitect with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); // 处理参数 this.processArgs(conf, args); Job job = Job.getInstance(conf, "new_install_user"); job.setJarByClass(NewInstallUserRunner.class); // 本地运行 TableMapReduceUtil.initTableMapperJob( initScans(job), NewInstallUserMapper.class, StatsUserDimension.class, TimeOutputValue.class, job, false); // 集群运行:本地提交和打包(jar)提交 // TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.class, StatsUserDimension.class, TimeOutputValue.class, job); job.setReducerClass(NewInstallUserReducer.class); job.setOutputKeyClass(StatsUserDimension.class); job.setOutputValueClass(MapWritableValue.class); // job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TransformerOutputFormat.class); if (job.waitForCompletion(true)) { // 执行成功, 需要计算总用户 this.calculateTotalUsers(conf); return 0; } else { return -1; } }
Example 10
Source File: ExportHBaseTableToAvro.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String rowKeyColumn = ""; if (args.length > 5) { rowKeyColumn = args[5]; } Job job = Job.getInstance(); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.setJarByClass(ExportHBaseTableToAvro.class); job.setJobName("ExportHBaseTableToAvro "); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(AvroKeyOutputFormat.class); AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath)); Schema.Parser parser = new Schema.Parser(); FileSystem fs = FileSystem.get(job.getConfiguration()); AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath)))); if (compressionCodec.equals("snappy")) { AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { // nothing } job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 11
Source File: ImportMapReduce.java From hiped2 with Apache License 2.0 | 4 votes |
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.OutputFileOption.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path output = new Path(cli.getArgValueAsString(CliCommonOpts.OutputFileOption.OUTPUT)); Configuration conf = super.getConf(); Scan scan = new Scan(); scan.addColumn(HBaseWriter.STOCK_DETAILS_COLUMN_FAMILY_AS_BYTES, HBaseWriter.STOCK_COLUMN_QUALIFIER_AS_BYTES); Job job = new Job(conf); job.setJarByClass(ImportMapReduce.class); TableMapReduceUtil.initTableMapperJob( HBaseWriter.STOCKS_TABLE_NAME, scan, Exporter.class, ImmutableBytesWritable.class, Put.class, job); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); FileOutputFormat.setOutputPath(job, output); if (job.waitForCompletion(true)) { return 0; } return 1; }
Example 12
Source File: ExportHBaseTableToParquet.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String rowKeyColumn = ""; if (args.length > 5) { rowKeyColumn = args[5]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.setJarByClass(ExportHBaseTableToParquet.class); job.setJobName("ExportHBaseTableToParquet "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(AvroParquetOutputFormat.class); AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath)); Schema.Parser parser = new Schema.Parser(); FileSystem fs = FileSystem.get(job.getConfiguration()); AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath)))); if (compressionCodec.equals("snappy")) { AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { // nothing } job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 13
Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0 | 4 votes |
public int run(Path outputDir, int numReducers) throws Exception { LOG.info("Running Verify with outputDir=" + outputDir +", numReducers=" + numReducers); job = Job.getInstance(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); setJobScannerConf(job); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_PREV); scan.setCaching(10000); scan.setCacheBlocks(false); if (isMultiUnevenColumnFamilies(getConf())) { scan.addColumn(BIG_FAMILY_NAME, BIG_FAMILY_NAME); scan.addColumn(TINY_FAMILY_NAME, TINY_FAMILY_NAME); } TableMapReduceUtil.initTableMapperJob(getTableName(getConf()).getName(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(SequenceFileAsBinaryOutputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); if (success) { Counters counters = job.getCounters(); if (null == counters) { LOG.warn("Counters were null, cannot verify Job completion." + " This is commonly a result of insufficient YARN configuration."); // We don't have access to the counters to know if we have "bad" counts return 0; } // If we find no unexpected values, the job didn't outright fail if (verifyUnexpectedValues(counters)) { // We didn't check referenced+unreferenced counts, leave that to visual inspection return 0; } } // We failed return 1; }
Example 14
Source File: ExportHBaseTableToDelimiteredSeq.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out .println("ExportHBaseTableToDelimiteredSeq {tableName} {ColumnFamily} {outputPath} {compressionCodec} {schemaLocationOnLocal} {delimiter} {rowKeyColumn.optional"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String delimiter = args[5]; String rowKeyColumn = ""; if (args.length > 6) { rowKeyColumn = args[6]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath); job.getConfiguration().set(DELIMITER_CONF, delimiter); job.setJarByClass(ExportHBaseTableToDelimiteredSeq.class); job.setJobName("ExportHBaseTableToDelimiteredSeq "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); if (compressionCodec.equals("snappy")) { SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { //nothing } job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 15
Source File: ScanPerformanceEvaluation.java From hbase with Apache License 2.0 | 4 votes |
public void testScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException { Stopwatch scanOpenTimer = Stopwatch.createUnstarted(); Stopwatch scanTimer = Stopwatch.createUnstarted(); Scan scan = getScan(); String jobName = "testScanMapReduce"; Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(getClass()); TableMapReduceUtil.initTableMapperJob( this.tablename, scan, MyMapper.class, NullWritable.class, NullWritable.class, job ); job.setNumReduceTasks(0); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(NullOutputFormat.class); scanTimer.start(); job.waitForCompletion(true); scanTimer.stop(); Counters counters = job.getCounters(); long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue(); long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue(); long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue(); double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS); double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS); double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS); System.out.println("HBase scan mapreduce: "); System.out.println("total time to open scanner: " + scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms"); System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms"); System.out.println("total bytes: " + totalBytes + " bytes (" + StringUtils.humanReadableInt(totalBytes) + ")"); System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s"); System.out.println("total rows : " + numRows); System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s"); System.out.println("total cells : " + numCells); System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s"); }
Example 16
Source File: MobRefReporter.java From hbase with Apache License 2.0 | 4 votes |
/** * Main method for the tool. * @return 0 if success, 1 for bad args. 2 if job aborted with an exception, * 3 if mr job was unsuccessful */ public int run(String[] args) throws IOException, InterruptedException { // TODO make family and table optional if (args.length != 3) { printUsage(); return 1; } final String output = args[0]; final String tableName = args[1]; final String familyName = args[2]; final long reportStartTime = EnvironmentEdgeManager.currentTime(); Configuration conf = getConf(); try { FileSystem fs = FileSystem.get(conf); // check whether the current user is the same one with the owner of hbase root String currentUserName = UserGroupInformation.getCurrentUser().getShortUserName(); FileStatus[] hbaseRootFileStat = fs.listStatus(new Path(conf.get(HConstants.HBASE_DIR))); if (hbaseRootFileStat.length > 0) { String owner = hbaseRootFileStat[0].getOwner(); if (!owner.equals(currentUserName)) { String errorMsg = "The current user[" + currentUserName + "] does not have hbase root credentials." + " If this job fails due to an inability to read HBase's internal directories, " + "you will need to rerun as a user with sufficient permissions. The HBase superuser " + "is a safe choice."; LOG.warn(errorMsg); } } else { LOG.error("The passed configs point to an HBase dir does not exist: {}", conf.get(HConstants.HBASE_DIR)); throw new IOException("The target HBase does not exist"); } byte[] family; int maxVersions; TableName tn = TableName.valueOf(tableName); try (Connection connection = ConnectionFactory.createConnection(conf); Admin admin = connection.getAdmin()) { TableDescriptor htd = admin.getDescriptor(tn); ColumnFamilyDescriptor hcd = htd.getColumnFamily(Bytes.toBytes(familyName)); if (hcd == null || !hcd.isMobEnabled()) { throw new IOException("Column family " + familyName + " is not a MOB column family"); } family = hcd.getName(); maxVersions = hcd.getMaxVersions(); } String id = getClass().getSimpleName() + UUID.randomUUID().toString().replace("-", ""); Job job = null; Scan scan = new Scan(); scan.addFamily(family); // Do not retrieve the mob data when scanning scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE)); // If a scanner caching value isn't set, pick a smaller default since we know we're doing // a full table scan and don't want to impact other clients badly. scan.setCaching(conf.getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 10000)); scan.setCacheBlocks(false); scan.readVersions(maxVersions); conf.set(REPORT_JOB_ID, id); job = Job.getInstance(conf); job.setJarByClass(getClass()); TableMapReduceUtil.initTableMapperJob(tn, scan, MobRefMapper.class, Text.class, ImmutableBytesWritable.class, job); job.setReducerClass(MobRefReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(output)); job.setJobName(getClass().getSimpleName() + "-" + tn + "-" + familyName); // for use in the reducer. easier than re-parsing it out of the scan string. job.getConfiguration().set(TableInputFormat.SCAN_COLUMN_FAMILY, familyName); // Use when we start this job as the base point for file "recency". job.getConfiguration().setLong(REPORT_START_DATETIME, reportStartTime); if (job.waitForCompletion(true)) { LOG.info("Finished creating report for '{}', family='{}'", tn, familyName); } else { System.err.println("Job was not successful"); return 3; } return 0; } catch (ClassNotFoundException | RuntimeException | IOException | InterruptedException e) { System.err.println("Job aborted due to exception " + e); return 2; // job failed } }
Example 17
Source File: TwoLevelIndexBuilder.java From yuzhouwan with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { String rootDir = "hdfs://hadoop1:8020/hbase"; String zkServer = "hadoop1"; String port = "2181"; TwoLevelIndexBuilder conn = new TwoLevelIndexBuilder(rootDir, zkServer, port); Configuration conf = conn.conf; String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); //TwoLevelIndexBuilder: TableName, ColumnFamily, Qualifier if (otherArgs.length < 3) { System.exit(-1); } //表名 String tableName = otherArgs[0]; //列族 String columnFamily = otherArgs[1]; conf.set("tableName", tableName); conf.set("columnFamily", columnFamily); //列 (可能存在多个列) String[] qualifiers = new String[otherArgs.length - 2]; System.arraycopy(otherArgs, 2, qualifiers, 0, qualifiers.length); //设置列 conf.setStrings("qualifiers", qualifiers); Job job = new Job(conf, tableName); job.setJarByClass(TwoLevelIndexBuilder.class); job.setMapperClass(TowLevelIndexMapper.class); job.setNumReduceTasks(0); //由于不需要执行 reduce阶段 job.setInputFormatClass(TableInputFormat.class); job.setOutputFormatClass(MultiTableOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, new Scan(), TowLevelIndexMapper.class, ImmutableBytesWritable.class, Put.class, job); job.waitForCompletion(true); }
Example 18
Source File: HalyardSummary.java From Halyard with Apache License 2.0 | 4 votes |
@Override public int run(CommandLine cmd) throws Exception { String source = cmd.getOptionValue('s'); String target = cmd.getOptionValue('t'); TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class, Gauge.class); HBaseConfiguration.addHbaseResources(getConf()); Job job = Job.getInstance(getConf(), "HalyardSummary " + source + (target == null ? " update" : " -> " + target)); job.getConfiguration().set(SOURCE, source); if (target != null) job.getConfiguration().set(TARGET, target); if (cmd.hasOption('g')) job.getConfiguration().set(TARGET_GRAPH, cmd.getOptionValue('g')); if (cmd.hasOption('d')) job.getConfiguration().setInt(DECIMATION_FACTOR, Integer.parseInt(cmd.getOptionValue('d'))); job.setJarByClass(HalyardSummary.class); TableMapReduceUtil.initCredentials(job); Scan scan = HalyardTableUtils.scan(new byte[]{HalyardTableUtils.POS_PREFIX}, new byte[]{HalyardTableUtils.POS_PREFIX + 1}); TableMapReduceUtil.initTableMapperJob(source, scan, SummaryMapper.class, ImmutableBytesWritable.class, LongWritable.class, job); job.setNumReduceTasks(1); job.setCombinerClass(SummaryCombiner.class); job.setReducerClass(SummaryReducer.class); job.setOutputFormatClass(NullOutputFormat.class); if (job.waitForCompletion(true)) { LOG.info("Summary Generation Completed.."); return 0; } return -1; }
Example 19
Source File: HalyardBulkDelete.java From Halyard with Apache License 2.0 | 4 votes |
@Override public int run(CommandLine cmd) throws Exception { String source = cmd.getOptionValue('t'); TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class, Gauge.class); HBaseConfiguration.addHbaseResources(getConf()); Job job = Job.getInstance(getConf(), "HalyardDelete " + source); if (cmd.hasOption('s')) { job.getConfiguration().set(SUBJECT, cmd.getOptionValue('s')); } if (cmd.hasOption('p')) { job.getConfiguration().set(PREDICATE, cmd.getOptionValue('p')); } if (cmd.hasOption('o')) { job.getConfiguration().set(OBJECT, cmd.getOptionValue('o')); } if (cmd.hasOption('g')) { job.getConfiguration().setStrings(CONTEXTS, cmd.getOptionValues('g')); } job.setJarByClass(HalyardBulkDelete.class); TableMapReduceUtil.initCredentials(job); Scan scan = HalyardTableUtils.scan(null, null); TableMapReduceUtil.initTableMapperJob(source, scan, DeleteMapper.class, ImmutableBytesWritable.class, LongWritable.class, job); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); job.setSpeculativeExecution(false); job.setMapSpeculativeExecution(false); job.setReduceSpeculativeExecution(false); try (HTable hTable = HalyardTableUtils.getTable(getConf(), source, false, 0)) { HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator()); FileOutputFormat.setOutputPath(job, new Path(cmd.getOptionValue('f'))); TableMapReduceUtil.addDependencyJars(job); if (job.waitForCompletion(true)) { new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(cmd.getOptionValue('f')), hTable); LOG.info("Bulk Delete Completed.."); return 0; } } return -1; }
Example 20
Source File: HalyardStats.java From Halyard with Apache License 2.0 | 4 votes |
@Override public int run(CommandLine cmd) throws Exception { String source = cmd.getOptionValue('s'); String target = cmd.getOptionValue('t'); String targetGraph = cmd.getOptionValue('g'); String graphContext = cmd.getOptionValue('c'); String thresh = cmd.getOptionValue('r'); TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class, Gauge.class); HBaseConfiguration.addHbaseResources(getConf()); Job job = Job.getInstance(getConf(), "HalyardStats " + source + (target == null ? " update" : " -> " + target)); job.getConfiguration().set(SOURCE, source); if (target != null) job.getConfiguration().set(TARGET, target); if (targetGraph != null) job.getConfiguration().set(TARGET_GRAPH, targetGraph); if (graphContext != null) job.getConfiguration().set(GRAPH_CONTEXT, graphContext); if (thresh != null) job.getConfiguration().setLong(THRESHOLD, Long.parseLong(thresh)); job.setJarByClass(HalyardStats.class); TableMapReduceUtil.initCredentials(job); Scan scan = HalyardTableUtils.scan(null, null); if (graphContext != null) { //restricting stats to scan given graph context only List<RowRange> ranges = new ArrayList<>(); byte[] gcHash = HalyardTableUtils.hashKey(SimpleValueFactory.getInstance().createIRI(graphContext)); ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, gcHash)); ranges.add(rowRange(HalyardTableUtils.CPOS_PREFIX, gcHash)); ranges.add(rowRange(HalyardTableUtils.COSP_PREFIX, gcHash)); if (target == null) { //add stats context to the scanned row ranges (when in update mode) to delete the related stats during MapReduce ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, HalyardTableUtils.hashKey(targetGraph == null ? HALYARD.STATS_GRAPH_CONTEXT : SimpleValueFactory.getInstance().createIRI(targetGraph)))); } scan.setFilter(new MultiRowRangeFilter(ranges)); } TableMapReduceUtil.initTableMapperJob( source, scan, StatsMapper.class, ImmutableBytesWritable.class, LongWritable.class, job); job.setPartitionerClass(StatsPartitioner.class); job.setReducerClass(StatsReducer.class); job.setOutputFormatClass(NullOutputFormat.class); if (job.waitForCompletion(true)) { LOG.info("Stats Generation Completed.."); return 0; } return -1; }