Java Code Examples for org.apache.hadoop.util.GenericOptionsParser#getRemainingArgs()

The following examples show how to use org.apache.hadoop.util.GenericOptionsParser#getRemainingArgs() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DistributedGrep.java    From hadoop-map-reduce-patterns with Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  Configuration conf = new Configuration();
  GenericOptionsParser parser = new GenericOptionsParser(conf, args);
  String[] otherArgs = parser.getRemainingArgs();
  if (otherArgs.length != 3) {
    System.err.println("Usage: DistributedGrep <regex> <in> <out>");
    ToolRunner.printGenericCommandUsage(System.err);
    System.exit(2);
  }
  Job job = new Job(conf, "Distributed Grep");
  job.setJarByClass(DistributedGrep.class);
  job.setMapperClass(GrepMapper.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Text.class);
  job.getConfiguration().set(REGEX_KEY, otherArgs[0]);
  FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
  FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}
 
Example 2
Source File: TezExampleBase.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Utility method to use the example from within code or a test.
 *
 * @param conf      the tez configuration instance which will be used to crate the DAG and
 *                  possible the Tez Client.
 * @param args      arguments to the example
 * @param tezClient an existing running {@link org.apache.tez.client.TezClient} instance if one
 *                  exists. If no TezClient is specified (null), one will be created based on the
 *                  provided configuration. If TezClient is specified, local mode option can not been
 *                  specified in arguments, it takes no effect.
 * @return Zero indicates success, non-zero indicates failure
 * @throws Exception 
 */
public int run(TezConfiguration conf, String[] args, @Nullable TezClient tezClient) throws
    Exception {
  setConf(conf);
  hadoopShim = new HadoopShimsLoader(conf).getHadoopShim();
  GenericOptionsParser optionParser = new GenericOptionsParser(conf, getExtraOptions(), args);
  if (optionParser.getCommandLine().hasOption(LOCAL_MODE)) {
    isLocalMode = true;
    if (tezClient != null) {
      throw new RuntimeException("can't specify local mode when TezClient is created, it takes no effect");
    }
  }
  if (optionParser.getCommandLine().hasOption(DISABLE_SPLIT_GROUPING)) {
    disableSplitGrouping = true;
  }
  if (optionParser.getCommandLine().hasOption(COUNTER_LOG)) {
    isCountersLog = true;
  }
  if (optionParser.getCommandLine().hasOption(GENERATE_SPLIT_IN_CLIENT)) {
    generateSplitInClient = true;
  }
  String[] otherArgs = optionParser.getRemainingArgs();
  return _execute(otherArgs, conf, tezClient);
}
 
Example 3
Source File: Anonymize.java    From hadoop-map-reduce-patterns with Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 2) {
		System.err.println("Usage: LastAccessDate <in> <out>");
		ToolRunner.printGenericCommandUsage(System.err);
		System.exit(2);
	}
	Job job = new Job(conf, "LastAccess Date");
	job.setJarByClass(Anonymize.class);
	job.setNumReduceTasks(4);
	job.setMapperClass(AnonymizeMapper.class);
	job.setMapOutputKeyClass(IntWritable.class);
	job.setMapOutputValueClass(Text.class);
	job.setReducerClass(ValueReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	boolean success = job.waitForCompletion(true);
	return success ? 0 : 1;
}
 
Example 4
Source File: BloomFilter.java    From hadoop-map-reduce-patterns with Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 3) {
		System.err
				.println("Usage: BloomFilter <bloom_filter_file> <in> <out>");
		ToolRunner.printGenericCommandUsage(System.err);
		System.exit(2);
	}

	DistributedCache.addCacheFile(new URI(otherArgs[0]), conf);
	Job job = new Job(conf, "Bloom Filter");
	job.setJarByClass(BloomFilter.class);
	job.setMapperClass(BloomFilterMapper.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
	boolean success = job.waitForCompletion(true);

	return success ? 0 : 1;
}
 
Example 5
Source File: DistinctUser.java    From hadoop-map-reduce-patterns with Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 2) {
		System.err.println("Usage: DistinctUser <in> <out>");
		ToolRunner.printGenericCommandUsage(System.err);
		System.exit(2);
	}
	Job job = new Job(conf, "Distinct User");
	job.setJarByClass(DistinctUser.class);
	job.setMapperClass(DistinctUserMapper.class);
	job.setReducerClass(DistinctUserReducer.class);
	job.setCombinerClass(DistinctUserReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	boolean success = job.waitForCompletion(true);

	return success ? 0 : 1;
}
 
Example 6
Source File: ResourceManager.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public static void main(String argv[]) {
  Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
  StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG);
  try {
    Configuration conf = new YarnConfiguration();
    GenericOptionsParser hParser = new GenericOptionsParser(conf, argv);
    argv = hParser.getRemainingArgs();
    // If -format-state-store, then delete RMStateStore; else startup normally
    if (argv.length == 1 && argv[0].equals("-format-state-store")) {
      deleteRMStateStore(conf);
    } else {
      ResourceManager resourceManager = new ResourceManager();
      ShutdownHookManager.get().addShutdownHook(
        new CompositeServiceShutdownHook(resourceManager),
        SHUTDOWN_HOOK_PRIORITY);
      resourceManager.init(conf);
      resourceManager.start();
    }
  } catch (Throwable t) {
    LOG.fatal("Error starting ResourceManager", t);
    System.exit(-1);
  }
}
 
Example 7
Source File: HbaseImporter.java    From Kylin with Apache License 2.0 5 votes vote down vote up
private static boolean runImport(String[] args, Configuration configuration) throws IOException, InterruptedException, ClassNotFoundException {
    // need to make a copy of the configuration because to make sure different temp dirs are used.
    GenericOptionsParser opts = new GenericOptionsParser(new Configuration(configuration), args);
    Configuration newConf = opts.getConfiguration();
    args = opts.getRemainingArgs();
    Job job = Import.createSubmittableJob(newConf, args);
    job.waitForCompletion(false);
    return job.isSuccessful();
}
 
Example 8
Source File: TezExampleBase.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override
public final int run(String[] args) throws Exception {
  Configuration conf = getConf();
  GenericOptionsParser optionParser = new GenericOptionsParser(conf, getExtraOptions(), args);
  String[] otherArgs = optionParser.getRemainingArgs();
  if (optionParser.getCommandLine().hasOption(LOCAL_MODE)) {
    isLocalMode = true;
  }
  if (optionParser.getCommandLine().hasOption(DISABLE_SPLIT_GROUPING)) {
    disableSplitGrouping = true;
  }
  if (optionParser.getCommandLine().hasOption(COUNTER_LOG)) {
    isCountersLog = true;
  }
  if (optionParser.getCommandLine().hasOption(GENERATE_SPLIT_IN_CLIENT)) {
    generateSplitInClient = true;
  }
  if (optionParser.getCommandLine().hasOption(LEAVE_AM_RUNNING)) {
    leaveAmRunning = true;
  }
  if (optionParser.getCommandLine().hasOption(RECONNECT_APP_ID)) {
      reconnectAppId = optionParser.getCommandLine().getOptionValue(RECONNECT_APP_ID);
  }
  hadoopShim = new HadoopShimsLoader(conf).getHadoopShim();

  return _execute(otherArgs, null, null);
}
 
Example 9
Source File: ReduceSideJoin.java    From hadoop-map-reduce-patterns with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 4) {
		printUsage();
	}
	Job job = new Job(conf, "ReduceSideJoin");
	job.setJarByClass(ReduceSideJoin.class);

	// Use MultipleInputs to set which input uses what mapper
	// This will keep parsing of each data set separate from a logical
	// standpoint
	// The first two elements of the args array are the two inputs
	MultipleInputs.addInputPath(job, new Path(args[0]),
			TextInputFormat.class, UserJoinMapper.class);
	MultipleInputs.addInputPath(job, new Path(args[1]),
			TextInputFormat.class, CommentJoinMapper.class);
	job.getConfiguration().set("join.type", args[2]);

	job.setReducerClass(UserJoinReducer.class);

	job.setOutputFormatClass(TextOutputFormat.class);
	TextOutputFormat.setOutputPath(job, new Path(args[3]));

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);

	return job.waitForCompletion(true) ? 0 : 2;
}
 
Example 10
Source File: StormEvaluatorLocalModeTest.java    From incubator-retired-mrql with Apache License 2.0 5 votes vote down vote up
@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = null;

	Config.bsp_mode = false;
	Config.spark_mode = false;
	Config.flink_mode = false;
	Config.storm_mode = true;
	Config.map_reduce_mode = false;

	Evaluator.evaluator = new StormEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local", "-storm" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();

	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);
	
	return Evaluator.evaluator;
}
 
Example 11
Source File: Content.java    From nutch-htmlunit with Apache License 2.0 5 votes vote down vote up
public static void main(String argv[]) throws Exception {

    String usage = "Content (-local | -dfs <namenode:port>) recno segment";

    if (argv.length < 3) {
      System.out.println("usage:" + usage);
      return;
    }
    Options opts = new Options();
    Configuration conf = NutchConfiguration.create();
    
    GenericOptionsParser parser =
      new GenericOptionsParser(conf, opts, argv);
    
    String[] remainingArgs = parser.getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    
    try {
      int recno = Integer.parseInt(remainingArgs[0]);
      String segment = remainingArgs[1];

      Path file = new Path(segment, DIR_NAME);
      System.out.println("Reading from file: " + file);

      ArrayFile.Reader contents = new ArrayFile.Reader(fs, file.toString(),
          conf);

      Content content = new Content();
      contents.get(recno, content);
      System.out.println("Retrieved " + recno + " from file " + file);

      System.out.println(content);

      contents.close();
    } finally {
      fs.close();
    }
  }
 
Example 12
Source File: Content.java    From anthelion with Apache License 2.0 5 votes vote down vote up
public static void main(String argv[]) throws Exception {

    String usage = "Content (-local | -dfs <namenode:port>) recno segment";

    if (argv.length < 3) {
      System.out.println("usage:" + usage);
      return;
    }
    Options opts = new Options();
    Configuration conf = NutchConfiguration.create();
    
    GenericOptionsParser parser =
      new GenericOptionsParser(conf, opts, argv);
    
    String[] remainingArgs = parser.getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    
    try {
      int recno = Integer.parseInt(remainingArgs[0]);
      String segment = remainingArgs[1];

      Path file = new Path(segment, DIR_NAME);
      System.out.println("Reading from file: " + file);

      ArrayFile.Reader contents = new ArrayFile.Reader(fs, file.toString(),
          conf);

      Content content = new Content();
      contents.get(recno, content);
      System.out.println("Retrieved " + recno + " from file " + file);

      System.out.println(content);

      contents.close();
    } finally {
      fs.close();
    }
  }
 
Example 13
Source File: SimpleRandomSampling.java    From hadoop-map-reduce-patterns with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 3) {
		printUsage();
	}
	Float filterPercentage = 0.0f;
	try {
		filterPercentage = Float.parseFloat(otherArgs[0]) / 100.0f;
	} catch (NumberFormatException nfe) {
		printUsage();
	}

	Job job = new Job(conf, "Simple Random Sampling");
	job.setJarByClass(SimpleRandomSampling.class);
	job.setMapperClass(SimpleRandomSamplingMapper.class);
	job.setOutputKeyClass(NullWritable.class);
	job.setOutputValueClass(Text.class);
	job.setNumReduceTasks(1); // prevent lots of small files
	job.getConfiguration()
			.setFloat(FILTER_PERCENTAGE_KEY, filterPercentage);
	FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
	boolean success = job.waitForCompletion(true);

	return success ? 0 : 1;
}
 
Example 14
Source File: BinningTags.java    From hadoop-map-reduce-patterns with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 2) {
		System.err.println("Usage: BinningTags <in> <out>");
		ToolRunner.printGenericCommandUsage(System.err);
		System.exit(2);
	}
	Job job = new Job(conf, "Binning Tags");
	job.setJarByClass(BinningTags.class);
	// Configure the MultipleOutputs by adding an output called "bins"
	// With the proper output format and mapper key/value pairs
	MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class,
			Text.class, NullWritable.class);
	// Enable the counters for the job
	// If there are a significant number of different named outputs, this
	// should be disabled
	MultipleOutputs.setCountersEnabled(job, true);
	// Map-only job
	job.setNumReduceTasks(0);
	job.setMapperClass(BinningMapper.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(NullWritable.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	boolean success = job.waitForCompletion(true);
	return success ? 0 : 1;
}
 
Example 15
Source File: DirectBigQueryWordCount.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args)
    throws IOException, InterruptedException, ClassNotFoundException {

  // GenericOptionsParser is a utility to parse command line arguments generic to the Hadoop
  // framework. This example won't cover the specifics, but will recognize several standard
  // command line arguments, enabling applications to easily specify a namenode, a
  // ResourceManager, additional configuration resources etc.
  GenericOptionsParser parser = new GenericOptionsParser(args);
  args = parser.getRemainingArgs();

  // Make sure we have the right parameters.
  if (args.length != 3) {
    System.out.println(
        "Usage: hadoop jar bigquery_wordcount.jar [ProjectId] [QualifiedInputTableId] "
            + "[GcsOutputPath]\n"
            + "    ProjectId - Project under which to issue the BigQuery operations. Also "
            + "serves as the default project for table IDs which don't explicitly specify a "
            + "project for the table.\n"
            + "    QualifiedInputTableId - Input table ID of the form "
            + "(Optional ProjectId):[DatasetId].[TableId]\n"
            + "    OutputPath - The output path to write data, e.g. "
            + "gs://bucket/dir/");
    System.exit(1);
  }

  // Get the individual parameters from the command line.
  String projectId = args[0];
  String inputQualifiedTableId = args[1];
  String outputPath = args[2];

  // Create the job and get its configuration.
  Job job = new Job(parser.getConfiguration(), "wordcount");
  Configuration conf = job.getConfiguration();

  // Set the job-level projectId.
  conf.set(PROJECT_ID.getKey(), projectId);

  // Configure input and output.
  BigQueryConfiguration.configureBigQueryInput(conf, inputQualifiedTableId);

  // Set column and predicate filters
  conf.set(SELECTED_FIELDS.getKey(), "word,word_count");
  conf.set(SQL_FILTER.getKey(), "word >= 'A' AND word <= 'zzz'");
  conf.set(MRJobConfig.NUM_MAPS, "999");

  // This helps Hadoop identify the Jar which contains the mapper and reducer by specifying a
  // class in that Jar. This is required if the jar is being passed on the command line to Hadoop.
  job.setJarByClass(DirectBigQueryWordCount.class);

  // Tell the job what the output will be.
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setInputFormatClass(DirectBigQueryInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);

  FileOutputFormat.setOutputPath(job, new Path(outputPath));

  job.waitForCompletion(true);
}
 
Example 16
Source File: BSPEvaluatorLocalModeTest.java    From incubator-retired-mrql with Apache License 2.0 3 votes vote down vote up
@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = null;

	Config.bsp_mode = true;
	Config.spark_mode = false;
	Config.map_reduce_mode = false;

	Evaluator.evaluator = new BSPEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local", "-bsp" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();
	
	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);
	
	return Evaluator.evaluator;
}
 
Example 17
Source File: BSPQueryLocalModeTest.java    From incubator-retired-mrql with Apache License 2.0 3 votes vote down vote up
@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = new Configuration();

	Config.bsp_mode = true;
	Config.spark_mode = false;
	Config.map_reduce_mode = false;

	Evaluator.evaluator = new BSPEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local", "-bsp" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();

	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);
	
	return Evaluator.evaluator;
}
 
Example 18
Source File: MapReduceQueryLocalModeTest.java    From incubator-retired-mrql with Apache License 2.0 3 votes vote down vote up
@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = new Configuration();

	Config.bsp_mode = false;
	Config.spark_mode = false;
	Config.map_reduce_mode = true;

	Evaluator.evaluator = new MapReduceEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();

	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);
	
	return Evaluator.evaluator;
}
 
Example 19
Source File: MapReduceEvaluatorLocalModeTest.java    From incubator-retired-mrql with Apache License 2.0 3 votes vote down vote up
@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = null;

	Config.bsp_mode = false;
	Config.spark_mode = false;
	Config.map_reduce_mode = true;

	Evaluator.evaluator = new MapReduceEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();

	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);

	return Evaluator.evaluator;
}
 
Example 20
Source File: MyWordCount.java    From BigDataArchitect with Apache License 2.0 2 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration(true);

        GenericOptionsParser parser = new GenericOptionsParser(conf, args);  //工具类帮我们把-D 等等的属性直接set到conf,会留下commandOptions
        String[] othargs = parser.getRemainingArgs();

        //让框架知道是windows异构平台运行
        conf.set("mapreduce.app-submission.cross-platform","true");

//        conf.set("mapreduce.framework.name","local");
//        System.out.println(conf.get("mapreduce.framework.name"));

        Job job = Job.getInstance(conf);


//        FileInputFormat.setMinInputSplitSize(job,2222);
//        job.setInputFormatClass(ooxx.class);






        job.setJar("C:\\Users\\admin\\IdeaProjects\\msbhadoop\\target\\hadoop-hdfs-1.0-0.1.jar");
        //必须必须写的
        job.setJarByClass(MyWordCount.class);

        job.setJobName("mashibing");

        Path infile = new Path(othargs[0]);
        TextInputFormat.addInputPath(job, infile);

        Path outfile = new Path(othargs[1]);
        if (outfile.getFileSystem(conf).exists(outfile)) outfile.getFileSystem(conf).delete(outfile, true);
        TextOutputFormat.setOutputPath(job, outfile);

        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setReducerClass(MyReducer.class);

//        job.setNumReduceTasks(2);
        // Submit the job, then poll for progress until the job is complete
        job.waitForCompletion(true);

    }