Java Code Examples for org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initTableReducerJob()

The following examples show how to use org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initTableReducerJob() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AnalyserLogDataRunner.java    From BigDataPlatform with GNU General Public License v3.0 6 votes vote down vote up
@Override
  public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);

    Job job = Job.getInstance(conf, "analyser_logdata");
    job.setJarByClass(AnalyserLogDataRunner.class);
    job.setMapperClass(AnalyserLogDataMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Put.class);

    //设置reducer配置
    //1集群上运行 打成jar运行  (要求addDependencyJars为true(默认true)
//    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job,null,
        null,null,null,true);
    //2本地运行 打成jar运行  (要求addDependencyJars为true(默认true)
//    TableMapReduceUtil
//        .initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
//            null, false);
    //设置输入路径
    job.setNumReduceTasks(0);
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
  }
 
Example 2
Source File: AnalyserLogDataRunner.java    From BigDataArchitect with Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
	Configuration conf = this.getConf();
	this.processArgs(conf, args);

	Job job = Job.getInstance(conf, "analyser_logdata");


	job.setJarByClass(AnalyserLogDataRunner.class);
	job.setMapperClass(AnalyserLogDataMapper.class);
	job.setMapOutputKeyClass(NullWritable.class);
	job.setMapOutputValueClass(Put.class);
	TableMapReduceUtil.initTableReducerJob(
			EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null,
			null, null, false);
	job.setNumReduceTasks(0);

	// 设置输入路径
	this.setJobInputPaths(job);
	return job.waitForCompletion(true) ? 0 : -1;
}
 
Example 3
Source File: WCRunner.java    From BigDataArchitect with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration(true);
        conf.set("hbase.zookeeper.quorum","node04,node02,node03");
        conf.set("mapreduce.app-submission.cross-platform","true");
        conf.set("mapreduce.framework.name","local");

        //创建job对象
        Job job = Job.getInstance(conf);
        job.setJarByClass(WCRunner.class);

        //设置mapper类
        job.setMapperClass(WCMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        //设置reduce类
//        job.setReducerClass();
//        TableMapReduceUtil.initTableMapperJob();
        TableMapReduceUtil.initTableReducerJob("wc",WCReducer.class,job,null,null,null,null,false);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Put.class);

        //指定hdfs存储数据的目录
        FileInputFormat.addInputPath(job,new Path("/wc/wc"));
        job.waitForCompletion(true);
    }
 
Example 4
Source File: LeastRecentlyUsedPruner.java    From metron with Apache License 2.0 6 votes vote down vote up
public static void setupHBaseJob(Job job, String sourceTable, String cf) throws IOException {
        Scan scan = new Scan();
        if(cf != null) {
            scan.addFamily(Bytes.toBytes(cf));
        }
        scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
        scan.setCacheBlocks(false);  // don't set to true for MR jobs
// set other scan attrs

        TableMapReduceUtil.initTableMapperJob(
                sourceTable,      // input table
                scan,	          // Scan instance to control CF and attribute selection
                PrunerMapper.class,   // mapper class
                null,	          // mapper output key
                null,	          // mapper output value
                job);
        TableMapReduceUtil.initTableReducerJob(
                sourceTable,      // output table
                null,             // reducer class
                job);
    }
 
Example 5
Source File: UpdateClusterJob.java    From recsys-offline with Apache License 2.0 6 votes vote down vote up
public void run() {

		try {
			Job job = Job.getInstance(HBaseContext.config, "UpdateClusterJob");
			job.setJarByClass(UpdateClusterJob.class);

			Scan scan = new Scan();
			scan.setCaching(500);
			scan.setCacheBlocks(false);
			TableMapReduceUtil.initTableMapperJob(
					Constants.hbase_cluster_model_table, scan,
					HBaseReadMapper.class, Text.class, Text.class, job);
			TableMapReduceUtil.initTableReducerJob(
					Constants.hbase_cluster_model_table,
					HBaseWriteReducer.class, job);
			job.setNumReduceTasks(4);

			boolean b = job.waitForCompletion(true);
			if (!b) {
				throw new IOException("error with job!");
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
 
Example 6
Source File: AnalyserLogDataRunner.java    From BigDataArchitect with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
	Configuration conf = this.getConf();
	this.processArgs(conf, args);

	Job job = Job.getInstance(conf, "analyser_logdata");

	// 设置本地提交job,集群运行,需要代码
	// File jarFile = EJob.createTempJar("target/classes");
	// ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
	// 设置本地提交job,集群运行,需要代码结束

	job.setJarByClass(AnalyserLogDataRunner.class);
	job.setMapperClass(AnalyserLogDataMapper.class);
	job.setMapOutputKeyClass(NullWritable.class);
	job.setMapOutputValueClass(Put.class);
	// 设置reducer配置
	// 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
	// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS,
	// null, job);
	// 2. 本地运行,要求参数addDependencyJars为false
	TableMapReduceUtil.initTableReducerJob(
			EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null,
			null, null, false);
	job.setNumReduceTasks(0);

	// 设置输入路径
	this.setJobInputPaths(job);
	return job.waitForCompletion(true) ? 0 : -1;
}
 
Example 7
Source File: AnalyserLogDataRunner.java    From BigDataArchitect with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);

    Job job = Job.getInstance(conf, "analyser_logdata");

    // 设置本地提交job,集群运行,需要代码
    // File jarFile = EJob.createTempJar("target/classes");
    // ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
    // 设置本地提交job,集群运行,需要代码结束

    job.setJarByClass(AnalyserLogDataRunner.class);
    job.setMapperClass(AnalyserLogDataMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Put.class);
    // 设置reducer配置
    // 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
    // TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    // 2. 本地运行,要求参数addDependencyJars为false
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
    job.setNumReduceTasks(0);

    // 设置输入路径
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}
 
Example 8
Source File: WordCountHBase.java    From cloud-bigtable-examples with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: wordcount-hbase <in> [<in>...] <table-name>");
    System.exit(2);
  }

  Job job = Job.getInstance(conf, "word count");

  for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }

  TableName tableName = TableName.valueOf(otherArgs[otherArgs.length - 1]);
  try {
    CreateTable.createTable(tableName, conf,
        Collections.singletonList(Bytes.toString(COLUMN_FAMILY)));
  } catch (Exception e) {
    LOG.error("Could not create the table.", e);
  }

  job.setJarByClass(WordCountHBase.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setMapOutputValueClass(IntWritable.class);

  TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), MyTableReducer.class, job);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
Example 9
Source File: PrepareClusterJob.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void run() {
	
	try {
		Job job = Job.getInstance(HBaseContext.config, "ClusterPrepareJob");
		job.setJarByClass(PrepareClusterJob.class);

		Scan scan = new Scan();
		scan.setCaching(500);
		scan.setCacheBlocks(false);
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_yearrate.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_repaylimittime.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_progress.getBytes());

		Filter filter = new SingleColumnValueFilter(Bytes.toBytes(Constants.hbase_column_family), 
				Bytes.toBytes(Constants.hbase_column_progress), CompareOp.NOT_EQUAL, Bytes.toBytes("100"));

		scan.setFilter(filter);

		TableMapReduceUtil.initTableMapperJob(Constants.hbase_p2p_table,
				scan, HBaseReadMapper.class, Text.class, Text.class, job);
		TableMapReduceUtil.initTableReducerJob(
				Constants.hbase_cluster_model_table,
				HBaseWriteReducer.class, job);
		job.setNumReduceTasks(1);

		boolean b = job.waitForCompletion(true);
		if (!b) {
			throw new IOException("error with job!");
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example 10
Source File: ImportTsv.java    From learning-hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Sets up the actual job.
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
		throws IOException, ClassNotFoundException {

	// Support non-XML supported characters
	// by re-encoding the passed separator as a Base64 string.
	String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
	if (actualSeparator != null) {
		conf.set(SEPARATOR_CONF_KEY,
				new String(Base64.encodeBytes(actualSeparator.getBytes())));
	}

	// See if a non-default Mapper was set
	String mapperClassName = conf.get(MAPPER_CONF_KEY);
	Class mapperClass = mapperClassName != null ? Class
			.forName(mapperClassName) : DEFAULT_MAPPER;

	String tableName = args[0];
	Path inputDir = new Path(args[1]);
	Job job = new Job(conf, NAME + "_" + tableName);
	job.setJarByClass(mapperClass);
	FileInputFormat.setInputPaths(job, inputDir);

	String inputCodec = conf.get(INPUT_LZO_KEY);
	if (inputCodec == null) {
		FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split
																// size =
																// 64m
		job.setInputFormatClass(TextInputFormat.class);
	} else {
		if (inputCodec.equalsIgnoreCase("lzo"))
			job.setInputFormatClass(LzoTextInputFormat.class);
		else {
			usage("not supported compression codec!");
			System.exit(-1);
		}
	}

	job.setMapperClass(mapperClass);

	String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
	if (hfileOutPath != null) {
		HTable table = new HTable(conf, tableName);
		job.setReducerClass(PutSortReducer.class);
		Path outputDir = new Path(hfileOutPath);
		FileOutputFormat.setOutputPath(job, outputDir);
		job.setMapOutputKeyClass(ImmutableBytesWritable.class);
		job.setMapOutputValueClass(Put.class);
		HFileOutputFormat.configureIncrementalLoad(job, table);
	} else {
		// No reducers. Just write straight to table. Call
		// initTableReducerJob
		// to set up the TableOutputFormat.
		TableMapReduceUtil.initTableReducerJob(tableName, null, job);
		job.setNumReduceTasks(0);
	}

	TableMapReduceUtil.addDependencyJars(job);
	TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
			com.google.common.base.Function.class /*
												 * Guava used by TsvParser
												 */);
	return job;
}
 
Example 11
Source File: JobFileRawLoader.java    From hraven with Apache License 2.0 4 votes vote down vote up
/**
 * @param conf to use to create and run the job. Should be an HBase
 *          configuration.
 * @param input path to the processFile * @param totalJobCount the total
 *          number of jobs that need to be run in this batch. Used in job
 *          name.
 * @return whether all job confs were loaded properly.
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private boolean runRawLoaderJob(Configuration myHBaseConf, String input,
    int totalJobCount)
    throws IOException, InterruptedException, ClassNotFoundException {
  boolean success;

  // Turn off speculative execution.
  // Note: must be BEFORE the job construction with the new mapreduce API.
  myHBaseConf.setBoolean("mapred.map.tasks.speculative.execution", false);

  // Set up job
  Job job = new Job(myHBaseConf, getJobName(totalJobCount));
  job.setJarByClass(JobFileRawLoader.class);

  Path inputPath = new Path(input);

  if (hdfs.exists(inputPath)) {

    // Set input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);

    job.setMapperClass(JobFileRawLoaderMapper.class);

    // Set the output format to push data into HBase.
    job.setOutputFormatClass(TableOutputFormat.class);
    TableMapReduceUtil.initTableReducerJob(Constants.HISTORY_RAW_TABLE, null,
        job);

    job.setOutputKeyClass(JobFileRawLoaderMapper.getOutputKeyClass());
    job.setOutputValueClass(JobFileRawLoaderMapper.getOutputValueClass());

    // This is a map-only class, skip reduce step
    job.setNumReduceTasks(0);

    // Run the job
    success = job.waitForCompletion(true);

    if (success) {
      success = hdfs.delete(inputPath, false);
    }

  } else {
    System.err.println("Unable to find processFile: " + inputPath);
    success = false;
  }
  return success;
}
 
Example 12
Source File: HBaseSinkMapReduce.java    From hiped2 with Apache License 2.0 2 votes vote down vote up
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {


  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  HBaseWriter.createTableAndColumn(conf, STOCKS_IMPORT_TABLE_NAME,
      HBaseWriter.STOCK_DETAILS_COLUMN_FAMILY_AS_BYTES);

  Job job = new Job(conf);

  job.setJarByClass(HBaseSinkMapReduce.class);

  TableMapReduceUtil.initTableReducerJob(
      STOCKS_IMPORT_TABLE_NAME,
      IdentityTableReducer.class,
      job);

  job.setMapperClass(MapClass.class);

  job.setMapOutputKeyClass(StockPriceWritable.class);
  job.setMapOutputValueClass(Put.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  outputPath.getFileSystem(conf).delete(outputPath, true);

  if (job.waitForCompletion(true)) {
    return 0;
  }
  return 1;
}