org.apache.hadoop.util.GenericOptionsParser#getRemainingArgs

Source File: DistributedGrep.java From hadoop-map-reduce-patterns with Apache License 2.0

6 votes

@Override
public int run(String[] args) throws Exception {
  Configuration conf = new Configuration();
  GenericOptionsParser parser = new GenericOptionsParser(conf, args);
  String[] otherArgs = parser.getRemainingArgs();
  if (otherArgs.length != 3) {
    System.err.println("Usage: DistributedGrep <regex> <in> <out>");
    ToolRunner.printGenericCommandUsage(System.err);
    System.exit(2);
  }
  Job job = new Job(conf, "Distributed Grep");
  job.setJarByClass(DistributedGrep.class);
  job.setMapperClass(GrepMapper.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Text.class);
  job.getConfiguration().set(REGEX_KEY, otherArgs[0]);
  FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
  FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}

Source File: TezExampleBase.java From tez with Apache License 2.0

6 votes

/**
 * Utility method to use the example from within code or a test.
 *
 * @param conf      the tez configuration instance which will be used to crate the DAG and
 *                  possible the Tez Client.
 * @param args      arguments to the example
 * @param tezClient an existing running {@link org.apache.tez.client.TezClient} instance if one
 *                  exists. If no TezClient is specified (null), one will be created based on the
 *                  provided configuration. If TezClient is specified, local mode option can not been
 *                  specified in arguments, it takes no effect.
 * @return Zero indicates success, non-zero indicates failure
 * @throws Exception 
 */
public int run(TezConfiguration conf, String[] args, @Nullable TezClient tezClient) throws
    Exception {
  setConf(conf);
  hadoopShim = new HadoopShimsLoader(conf).getHadoopShim();
  GenericOptionsParser optionParser = new GenericOptionsParser(conf, getExtraOptions(), args);
  if (optionParser.getCommandLine().hasOption(LOCAL_MODE)) {
    isLocalMode = true;
    if (tezClient != null) {
      throw new RuntimeException("can't specify local mode when TezClient is created, it takes no effect");
    }
  }
  if (optionParser.getCommandLine().hasOption(DISABLE_SPLIT_GROUPING)) {
    disableSplitGrouping = true;
  }
  if (optionParser.getCommandLine().hasOption(COUNTER_LOG)) {
    isCountersLog = true;
  }
  if (optionParser.getCommandLine().hasOption(GENERATE_SPLIT_IN_CLIENT)) {
    generateSplitInClient = true;
  }
  String[] otherArgs = optionParser.getRemainingArgs();
  return _execute(otherArgs, conf, tezClient);
}

Source File: Anonymize.java From hadoop-map-reduce-patterns with Apache License 2.0

6 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 2) {
		System.err.println("Usage: LastAccessDate <in> <out>");
		ToolRunner.printGenericCommandUsage(System.err);
		System.exit(2);
	}
	Job job = new Job(conf, "LastAccess Date");
	job.setJarByClass(Anonymize.class);
	job.setNumReduceTasks(4);
	job.setMapperClass(AnonymizeMapper.class);
	job.setMapOutputKeyClass(IntWritable.class);
	job.setMapOutputValueClass(Text.class);
	job.setReducerClass(ValueReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	boolean success = job.waitForCompletion(true);
	return success ? 0 : 1;
}

Source File: BloomFilter.java From hadoop-map-reduce-patterns with Apache License 2.0

6 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 3) {
		System.err
				.println("Usage: BloomFilter <bloom_filter_file> <in> <out>");
		ToolRunner.printGenericCommandUsage(System.err);
		System.exit(2);
	}

	DistributedCache.addCacheFile(new URI(otherArgs[0]), conf);
	Job job = new Job(conf, "Bloom Filter");
	job.setJarByClass(BloomFilter.class);
	job.setMapperClass(BloomFilterMapper.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
	boolean success = job.waitForCompletion(true);

	return success ? 0 : 1;
}

Source File: DistinctUser.java From hadoop-map-reduce-patterns with Apache License 2.0

6 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 2) {
		System.err.println("Usage: DistinctUser <in> <out>");
		ToolRunner.printGenericCommandUsage(System.err);
		System.exit(2);
	}
	Job job = new Job(conf, "Distinct User");
	job.setJarByClass(DistinctUser.class);
	job.setMapperClass(DistinctUserMapper.class);
	job.setReducerClass(DistinctUserReducer.class);
	job.setCombinerClass(DistinctUserReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	boolean success = job.waitForCompletion(true);

	return success ? 0 : 1;
}

Source File: ResourceManager.java From hadoop with Apache License 2.0

6 votes

public static void main(String argv[]) {
  Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
  StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG);
  try {
    Configuration conf = new YarnConfiguration();
    GenericOptionsParser hParser = new GenericOptionsParser(conf, argv);
    argv = hParser.getRemainingArgs();
    // If -format-state-store, then delete RMStateStore; else startup normally
    if (argv.length == 1 && argv[0].equals("-format-state-store")) {
      deleteRMStateStore(conf);
    } else {
      ResourceManager resourceManager = new ResourceManager();
      ShutdownHookManager.get().addShutdownHook(
        new CompositeServiceShutdownHook(resourceManager),
        SHUTDOWN_HOOK_PRIORITY);
      resourceManager.init(conf);
      resourceManager.start();
    }
  } catch (Throwable t) {
    LOG.fatal("Error starting ResourceManager", t);
    System.exit(-1);
  }
}

Source File: HbaseImporter.java From Kylin with Apache License 2.0

5 votes

private static boolean runImport(String[] args, Configuration configuration) throws IOException, InterruptedException, ClassNotFoundException {
    // need to make a copy of the configuration because to make sure different temp dirs are used.
    GenericOptionsParser opts = new GenericOptionsParser(new Configuration(configuration), args);
    Configuration newConf = opts.getConfiguration();
    args = opts.getRemainingArgs();
    Job job = Import.createSubmittableJob(newConf, args);
    job.waitForCompletion(false);
    return job.isSuccessful();
}

Source File: TezExampleBase.java From tez with Apache License 2.0

5 votes

@Override
public final int run(String[] args) throws Exception {
  Configuration conf = getConf();
  GenericOptionsParser optionParser = new GenericOptionsParser(conf, getExtraOptions(), args);
  String[] otherArgs = optionParser.getRemainingArgs();
  if (optionParser.getCommandLine().hasOption(LOCAL_MODE)) {
    isLocalMode = true;
  }
  if (optionParser.getCommandLine().hasOption(DISABLE_SPLIT_GROUPING)) {
    disableSplitGrouping = true;
  }
  if (optionParser.getCommandLine().hasOption(COUNTER_LOG)) {
    isCountersLog = true;
  }
  if (optionParser.getCommandLine().hasOption(GENERATE_SPLIT_IN_CLIENT)) {
    generateSplitInClient = true;
  }
  if (optionParser.getCommandLine().hasOption(LEAVE_AM_RUNNING)) {
    leaveAmRunning = true;
  }
  if (optionParser.getCommandLine().hasOption(RECONNECT_APP_ID)) {
      reconnectAppId = optionParser.getCommandLine().getOptionValue(RECONNECT_APP_ID);
  }
  hadoopShim = new HadoopShimsLoader(conf).getHadoopShim();

  return _execute(otherArgs, null, null);
}

Source File: ReduceSideJoin.java From hadoop-map-reduce-patterns with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 4) {
		printUsage();
	}
	Job job = new Job(conf, "ReduceSideJoin");
	job.setJarByClass(ReduceSideJoin.class);

	// Use MultipleInputs to set which input uses what mapper
	// This will keep parsing of each data set separate from a logical
	// standpoint
	// The first two elements of the args array are the two inputs
	MultipleInputs.addInputPath(job, new Path(args[0]),
			TextInputFormat.class, UserJoinMapper.class);
	MultipleInputs.addInputPath(job, new Path(args[1]),
			TextInputFormat.class, CommentJoinMapper.class);
	job.getConfiguration().set("join.type", args[2]);

	job.setReducerClass(UserJoinReducer.class);

	job.setOutputFormatClass(TextOutputFormat.class);
	TextOutputFormat.setOutputPath(job, new Path(args[3]));

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);

	return job.waitForCompletion(true) ? 0 : 2;
}

Source File: StormEvaluatorLocalModeTest.java From incubator-retired-mrql with Apache License 2.0

5 votes

@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = null;

	Config.bsp_mode = false;
	Config.spark_mode = false;
	Config.flink_mode = false;
	Config.storm_mode = true;
	Config.map_reduce_mode = false;

	Evaluator.evaluator = new StormEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local", "-storm" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();

	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);
	
	return Evaluator.evaluator;
}

Source File: Content.java From nutch-htmlunit with Apache License 2.0

5 votes

public static void main(String argv[]) throws Exception {

    String usage = "Content (-local | -dfs <namenode:port>) recno segment";

    if (argv.length < 3) {
      System.out.println("usage:" + usage);
      return;
    }
    Options opts = new Options();
    Configuration conf = NutchConfiguration.create();
    
    GenericOptionsParser parser =
      new GenericOptionsParser(conf, opts, argv);
    
    String[] remainingArgs = parser.getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    
    try {
      int recno = Integer.parseInt(remainingArgs[0]);
      String segment = remainingArgs[1];

      Path file = new Path(segment, DIR_NAME);
      System.out.println("Reading from file: " + file);

      ArrayFile.Reader contents = new ArrayFile.Reader(fs, file.toString(),
          conf);

      Content content = new Content();
      contents.get(recno, content);
      System.out.println("Retrieved " + recno + " from file " + file);

      System.out.println(content);

      contents.close();
    } finally {
      fs.close();
    }
  }

Source File: Content.java From anthelion with Apache License 2.0

5 votes

public static void main(String argv[]) throws Exception {

    String usage = "Content (-local | -dfs <namenode:port>) recno segment";

    if (argv.length < 3) {
      System.out.println("usage:" + usage);
      return;
    }
    Options opts = new Options();
    Configuration conf = NutchConfiguration.create();
    
    GenericOptionsParser parser =
      new GenericOptionsParser(conf, opts, argv);
    
    String[] remainingArgs = parser.getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    
    try {
      int recno = Integer.parseInt(remainingArgs[0]);
      String segment = remainingArgs[1];

      Path file = new Path(segment, DIR_NAME);
      System.out.println("Reading from file: " + file);

      ArrayFile.Reader contents = new ArrayFile.Reader(fs, file.toString(),
          conf);

      Content content = new Content();
      contents.get(recno, content);
      System.out.println("Retrieved " + recno + " from file " + file);

      System.out.println(content);

      contents.close();
    } finally {
      fs.close();
    }
  }

Source File: SimpleRandomSampling.java From hadoop-map-reduce-patterns with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 3) {
		printUsage();
	}
	Float filterPercentage = 0.0f;
	try {
		filterPercentage = Float.parseFloat(otherArgs[0]) / 100.0f;
	} catch (NumberFormatException nfe) {
		printUsage();
	}

	Job job = new Job(conf, "Simple Random Sampling");
	job.setJarByClass(SimpleRandomSampling.class);
	job.setMapperClass(SimpleRandomSamplingMapper.class);
	job.setOutputKeyClass(NullWritable.class);
	job.setOutputValueClass(Text.class);
	job.setNumReduceTasks(1); // prevent lots of small files
	job.getConfiguration()
			.setFloat(FILTER_PERCENTAGE_KEY, filterPercentage);
	FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
	boolean success = job.waitForCompletion(true);

	return success ? 0 : 1;
}

Source File: BinningTags.java From hadoop-map-reduce-patterns with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = new Configuration();
	GenericOptionsParser parser = new GenericOptionsParser(conf, args);
	String[] otherArgs = parser.getRemainingArgs();
	if (otherArgs.length != 2) {
		System.err.println("Usage: BinningTags <in> <out>");
		ToolRunner.printGenericCommandUsage(System.err);
		System.exit(2);
	}
	Job job = new Job(conf, "Binning Tags");
	job.setJarByClass(BinningTags.class);
	// Configure the MultipleOutputs by adding an output called "bins"
	// With the proper output format and mapper key/value pairs
	MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class,
			Text.class, NullWritable.class);
	// Enable the counters for the job
	// If there are a significant number of different named outputs, this
	// should be disabled
	MultipleOutputs.setCountersEnabled(job, true);
	// Map-only job
	job.setNumReduceTasks(0);
	job.setMapperClass(BinningMapper.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(NullWritable.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	boolean success = job.waitForCompletion(true);
	return success ? 0 : 1;
}

Source File: DirectBigQueryWordCount.java From hadoop-connectors with Apache License 2.0

4 votes

public static void main(String[] args)
    throws IOException, InterruptedException, ClassNotFoundException {

  // GenericOptionsParser is a utility to parse command line arguments generic to the Hadoop
  // framework. This example won't cover the specifics, but will recognize several standard
  // command line arguments, enabling applications to easily specify a namenode, a
  // ResourceManager, additional configuration resources etc.
  GenericOptionsParser parser = new GenericOptionsParser(args);
  args = parser.getRemainingArgs();

  // Make sure we have the right parameters.
  if (args.length != 3) {
    System.out.println(
        "Usage: hadoop jar bigquery_wordcount.jar [ProjectId] [QualifiedInputTableId] "
            + "[GcsOutputPath]\n"
            + "    ProjectId - Project under which to issue the BigQuery operations. Also "
            + "serves as the default project for table IDs which don't explicitly specify a "
            + "project for the table.\n"
            + "    QualifiedInputTableId - Input table ID of the form "
            + "(Optional ProjectId):[DatasetId].[TableId]\n"
            + "    OutputPath - The output path to write data, e.g. "
            + "gs://bucket/dir/");
    System.exit(1);
  }

  // Get the individual parameters from the command line.
  String projectId = args[0];
  String inputQualifiedTableId = args[1];
  String outputPath = args[2];

  // Create the job and get its configuration.
  Job job = new Job(parser.getConfiguration(), "wordcount");
  Configuration conf = job.getConfiguration();

  // Set the job-level projectId.
  conf.set(PROJECT_ID.getKey(), projectId);

  // Configure input and output.
  BigQueryConfiguration.configureBigQueryInput(conf, inputQualifiedTableId);

  // Set column and predicate filters
  conf.set(SELECTED_FIELDS.getKey(), "word,word_count");
  conf.set(SQL_FILTER.getKey(), "word >= 'A' AND word <= 'zzz'");
  conf.set(MRJobConfig.NUM_MAPS, "999");

  // This helps Hadoop identify the Jar which contains the mapper and reducer by specifying a
  // class in that Jar. This is required if the jar is being passed on the command line to Hadoop.
  job.setJarByClass(DirectBigQueryWordCount.class);

  // Tell the job what the output will be.
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setInputFormatClass(DirectBigQueryInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);

  FileOutputFormat.setOutputPath(job, new Path(outputPath));

  job.waitForCompletion(true);
}

Source File: BSPEvaluatorLocalModeTest.java From incubator-retired-mrql with Apache License 2.0

3 votes

@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = null;

	Config.bsp_mode = true;
	Config.spark_mode = false;
	Config.map_reduce_mode = false;

	Evaluator.evaluator = new BSPEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local", "-bsp" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();
	
	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);
	
	return Evaluator.evaluator;
}

Source File: BSPQueryLocalModeTest.java From incubator-retired-mrql with Apache License 2.0

3 votes

@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = new Configuration();

	Config.bsp_mode = true;
	Config.spark_mode = false;
	Config.map_reduce_mode = false;

	Evaluator.evaluator = new BSPEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local", "-bsp" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();

	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);
	
	return Evaluator.evaluator;
}

Source File: MapReduceQueryLocalModeTest.java From incubator-retired-mrql with Apache License 2.0

3 votes

@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = new Configuration();

	Config.bsp_mode = false;
	Config.spark_mode = false;
	Config.map_reduce_mode = true;

	Evaluator.evaluator = new MapReduceEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();

	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);
	
	return Evaluator.evaluator;
}

Source File: MapReduceEvaluatorLocalModeTest.java From incubator-retired-mrql with Apache License 2.0

3 votes

@Override
protected Evaluator createEvaluator() throws Exception {
	Configuration conf = null;

	Config.bsp_mode = false;
	Config.spark_mode = false;
	Config.map_reduce_mode = true;

	Evaluator.evaluator = new MapReduceEvaluator();

	Config.quiet_execution = true;

	String[] args = new String[] { "-local" };

	conf = Evaluator.evaluator.new_configuration();
	GenericOptionsParser gop = new GenericOptionsParser(conf, args);
	conf = gop.getConfiguration();

	args = gop.getRemainingArgs();

	Config.hadoop_mode = true;
	Config.testing = true;
	Config.parse_args(args, conf);
	
	Evaluator.evaluator.init(conf);

	return Evaluator.evaluator;
}

Source File: MyWordCount.java From BigDataArchitect with Apache License 2.0

2 votes

public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration(true);

        GenericOptionsParser parser = new GenericOptionsParser(conf, args);  //工具类帮我们把-D 等等的属性直接set到conf，会留下commandOptions
        String[] othargs = parser.getRemainingArgs();

        //让框架知道是windows异构平台运行
        conf.set("mapreduce.app-submission.cross-platform","true");

//        conf.set("mapreduce.framework.name","local");
//        System.out.println(conf.get("mapreduce.framework.name"));

        Job job = Job.getInstance(conf);


//        FileInputFormat.setMinInputSplitSize(job,2222);
//        job.setInputFormatClass(ooxx.class);






        job.setJar("C:\\Users\\admin\\IdeaProjects\\msbhadoop\\target\\hadoop-hdfs-1.0-0.1.jar");
        //必须必须写的
        job.setJarByClass(MyWordCount.class);

        job.setJobName("mashibing");

        Path infile = new Path(othargs[0]);
        TextInputFormat.addInputPath(job, infile);

        Path outfile = new Path(othargs[1]);
        if (outfile.getFileSystem(conf).exists(outfile)) outfile.getFileSystem(conf).delete(outfile, true);
        TextOutputFormat.setOutputPath(job, outfile);

        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setReducerClass(MyReducer.class);

//        job.setNumReduceTasks(2);
        // Submit the job, then poll for progress until the job is complete
        job.waitForCompletion(true);

    }

Java Code Examples for org.apache.hadoop.util.GenericOptionsParser#getRemainingArgs()