Java Code Examples for org.apache.hadoop.util.GenericOptionsParser#getRemainingArgs()
The following examples show how to use
org.apache.hadoop.util.GenericOptionsParser#getRemainingArgs() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DistributedGrep.java From hadoop-map-reduce-patterns with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: DistributedGrep <regex> <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } Job job = new Job(conf, "Distributed Grep"); job.setJarByClass(DistributedGrep.class); job.setMapperClass(GrepMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.getConfiguration().set(REGEX_KEY, otherArgs[0]); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
Example 2
Source File: TezExampleBase.java From tez with Apache License 2.0 | 6 votes |
/** * Utility method to use the example from within code or a test. * * @param conf the tez configuration instance which will be used to crate the DAG and * possible the Tez Client. * @param args arguments to the example * @param tezClient an existing running {@link org.apache.tez.client.TezClient} instance if one * exists. If no TezClient is specified (null), one will be created based on the * provided configuration. If TezClient is specified, local mode option can not been * specified in arguments, it takes no effect. * @return Zero indicates success, non-zero indicates failure * @throws Exception */ public int run(TezConfiguration conf, String[] args, @Nullable TezClient tezClient) throws Exception { setConf(conf); hadoopShim = new HadoopShimsLoader(conf).getHadoopShim(); GenericOptionsParser optionParser = new GenericOptionsParser(conf, getExtraOptions(), args); if (optionParser.getCommandLine().hasOption(LOCAL_MODE)) { isLocalMode = true; if (tezClient != null) { throw new RuntimeException("can't specify local mode when TezClient is created, it takes no effect"); } } if (optionParser.getCommandLine().hasOption(DISABLE_SPLIT_GROUPING)) { disableSplitGrouping = true; } if (optionParser.getCommandLine().hasOption(COUNTER_LOG)) { isCountersLog = true; } if (optionParser.getCommandLine().hasOption(GENERATE_SPLIT_IN_CLIENT)) { generateSplitInClient = true; } String[] otherArgs = optionParser.getRemainingArgs(); return _execute(otherArgs, conf, tezClient); }
Example 3
Source File: Anonymize.java From hadoop-map-reduce-patterns with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: LastAccessDate <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } Job job = new Job(conf, "LastAccess Date"); job.setJarByClass(Anonymize.class); job.setNumReduceTasks(4); job.setMapperClass(AnonymizeMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(ValueReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
Example 4
Source File: BloomFilter.java From hadoop-map-reduce-patterns with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 3) { System.err .println("Usage: BloomFilter <bloom_filter_file> <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } DistributedCache.addCacheFile(new URI(otherArgs[0]), conf); Job job = new Job(conf, "Bloom Filter"); job.setJarByClass(BloomFilter.class); job.setMapperClass(BloomFilterMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
Example 5
Source File: DistinctUser.java From hadoop-map-reduce-patterns with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: DistinctUser <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } Job job = new Job(conf, "Distinct User"); job.setJarByClass(DistinctUser.class); job.setMapperClass(DistinctUserMapper.class); job.setReducerClass(DistinctUserReducer.class); job.setCombinerClass(DistinctUserReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
Example 6
Source File: ResourceManager.java From hadoop with Apache License 2.0 | 6 votes |
public static void main(String argv[]) { Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler()); StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG); try { Configuration conf = new YarnConfiguration(); GenericOptionsParser hParser = new GenericOptionsParser(conf, argv); argv = hParser.getRemainingArgs(); // If -format-state-store, then delete RMStateStore; else startup normally if (argv.length == 1 && argv[0].equals("-format-state-store")) { deleteRMStateStore(conf); } else { ResourceManager resourceManager = new ResourceManager(); ShutdownHookManager.get().addShutdownHook( new CompositeServiceShutdownHook(resourceManager), SHUTDOWN_HOOK_PRIORITY); resourceManager.init(conf); resourceManager.start(); } } catch (Throwable t) { LOG.fatal("Error starting ResourceManager", t); System.exit(-1); } }
Example 7
Source File: HbaseImporter.java From Kylin with Apache License 2.0 | 5 votes |
private static boolean runImport(String[] args, Configuration configuration) throws IOException, InterruptedException, ClassNotFoundException { // need to make a copy of the configuration because to make sure different temp dirs are used. GenericOptionsParser opts = new GenericOptionsParser(new Configuration(configuration), args); Configuration newConf = opts.getConfiguration(); args = opts.getRemainingArgs(); Job job = Import.createSubmittableJob(newConf, args); job.waitForCompletion(false); return job.isSuccessful(); }
Example 8
Source File: TezExampleBase.java From tez with Apache License 2.0 | 5 votes |
@Override public final int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser optionParser = new GenericOptionsParser(conf, getExtraOptions(), args); String[] otherArgs = optionParser.getRemainingArgs(); if (optionParser.getCommandLine().hasOption(LOCAL_MODE)) { isLocalMode = true; } if (optionParser.getCommandLine().hasOption(DISABLE_SPLIT_GROUPING)) { disableSplitGrouping = true; } if (optionParser.getCommandLine().hasOption(COUNTER_LOG)) { isCountersLog = true; } if (optionParser.getCommandLine().hasOption(GENERATE_SPLIT_IN_CLIENT)) { generateSplitInClient = true; } if (optionParser.getCommandLine().hasOption(LEAVE_AM_RUNNING)) { leaveAmRunning = true; } if (optionParser.getCommandLine().hasOption(RECONNECT_APP_ID)) { reconnectAppId = optionParser.getCommandLine().getOptionValue(RECONNECT_APP_ID); } hadoopShim = new HadoopShimsLoader(conf).getHadoopShim(); return _execute(otherArgs, null, null); }
Example 9
Source File: ReduceSideJoin.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 4) { printUsage(); } Job job = new Job(conf, "ReduceSideJoin"); job.setJarByClass(ReduceSideJoin.class); // Use MultipleInputs to set which input uses what mapper // This will keep parsing of each data set separate from a logical // standpoint // The first two elements of the args array are the two inputs MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, UserJoinMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentJoinMapper.class); job.getConfiguration().set("join.type", args[2]); job.setReducerClass(UserJoinReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[3])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 2; }
Example 10
Source File: StormEvaluatorLocalModeTest.java From incubator-retired-mrql with Apache License 2.0 | 5 votes |
@Override protected Evaluator createEvaluator() throws Exception { Configuration conf = null; Config.bsp_mode = false; Config.spark_mode = false; Config.flink_mode = false; Config.storm_mode = true; Config.map_reduce_mode = false; Evaluator.evaluator = new StormEvaluator(); Config.quiet_execution = true; String[] args = new String[] { "-local", "-storm" }; conf = Evaluator.evaluator.new_configuration(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); args = gop.getRemainingArgs(); Config.hadoop_mode = true; Config.testing = true; Config.parse_args(args, conf); Evaluator.evaluator.init(conf); return Evaluator.evaluator; }
Example 11
Source File: Content.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
public static void main(String argv[]) throws Exception { String usage = "Content (-local | -dfs <namenode:port>) recno segment"; if (argv.length < 3) { System.out.println("usage:" + usage); return; } Options opts = new Options(); Configuration conf = NutchConfiguration.create(); GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv); String[] remainingArgs = parser.getRemainingArgs(); FileSystem fs = FileSystem.get(conf); try { int recno = Integer.parseInt(remainingArgs[0]); String segment = remainingArgs[1]; Path file = new Path(segment, DIR_NAME); System.out.println("Reading from file: " + file); ArrayFile.Reader contents = new ArrayFile.Reader(fs, file.toString(), conf); Content content = new Content(); contents.get(recno, content); System.out.println("Retrieved " + recno + " from file " + file); System.out.println(content); contents.close(); } finally { fs.close(); } }
Example 12
Source File: Content.java From anthelion with Apache License 2.0 | 5 votes |
public static void main(String argv[]) throws Exception { String usage = "Content (-local | -dfs <namenode:port>) recno segment"; if (argv.length < 3) { System.out.println("usage:" + usage); return; } Options opts = new Options(); Configuration conf = NutchConfiguration.create(); GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv); String[] remainingArgs = parser.getRemainingArgs(); FileSystem fs = FileSystem.get(conf); try { int recno = Integer.parseInt(remainingArgs[0]); String segment = remainingArgs[1]; Path file = new Path(segment, DIR_NAME); System.out.println("Reading from file: " + file); ArrayFile.Reader contents = new ArrayFile.Reader(fs, file.toString(), conf); Content content = new Content(); contents.get(recno, content); System.out.println("Retrieved " + recno + " from file " + file); System.out.println(content); contents.close(); } finally { fs.close(); } }
Example 13
Source File: SimpleRandomSampling.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 3) { printUsage(); } Float filterPercentage = 0.0f; try { filterPercentage = Float.parseFloat(otherArgs[0]) / 100.0f; } catch (NumberFormatException nfe) { printUsage(); } Job job = new Job(conf, "Simple Random Sampling"); job.setJarByClass(SimpleRandomSampling.class); job.setMapperClass(SimpleRandomSamplingMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); // prevent lots of small files job.getConfiguration() .setFloat(FILTER_PERCENTAGE_KEY, filterPercentage); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
Example 14
Source File: BinningTags.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: BinningTags <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } Job job = new Job(conf, "Binning Tags"); job.setJarByClass(BinningTags.class); // Configure the MultipleOutputs by adding an output called "bins" // With the proper output format and mapper key/value pairs MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class); // Enable the counters for the job // If there are a significant number of different named outputs, this // should be disabled MultipleOutputs.setCountersEnabled(job, true); // Map-only job job.setNumReduceTasks(0); job.setMapperClass(BinningMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
Example 15
Source File: DirectBigQueryWordCount.java From hadoop-connectors with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { // GenericOptionsParser is a utility to parse command line arguments generic to the Hadoop // framework. This example won't cover the specifics, but will recognize several standard // command line arguments, enabling applications to easily specify a namenode, a // ResourceManager, additional configuration resources etc. GenericOptionsParser parser = new GenericOptionsParser(args); args = parser.getRemainingArgs(); // Make sure we have the right parameters. if (args.length != 3) { System.out.println( "Usage: hadoop jar bigquery_wordcount.jar [ProjectId] [QualifiedInputTableId] " + "[GcsOutputPath]\n" + " ProjectId - Project under which to issue the BigQuery operations. Also " + "serves as the default project for table IDs which don't explicitly specify a " + "project for the table.\n" + " QualifiedInputTableId - Input table ID of the form " + "(Optional ProjectId):[DatasetId].[TableId]\n" + " OutputPath - The output path to write data, e.g. " + "gs://bucket/dir/"); System.exit(1); } // Get the individual parameters from the command line. String projectId = args[0]; String inputQualifiedTableId = args[1]; String outputPath = args[2]; // Create the job and get its configuration. Job job = new Job(parser.getConfiguration(), "wordcount"); Configuration conf = job.getConfiguration(); // Set the job-level projectId. conf.set(PROJECT_ID.getKey(), projectId); // Configure input and output. BigQueryConfiguration.configureBigQueryInput(conf, inputQualifiedTableId); // Set column and predicate filters conf.set(SELECTED_FIELDS.getKey(), "word,word_count"); conf.set(SQL_FILTER.getKey(), "word >= 'A' AND word <= 'zzz'"); conf.set(MRJobConfig.NUM_MAPS, "999"); // This helps Hadoop identify the Jar which contains the mapper and reducer by specifying a // class in that Jar. This is required if the jar is being passed on the command line to Hadoop. job.setJarByClass(DirectBigQueryWordCount.class); // Tell the job what the output will be. job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(DirectBigQueryInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); }
Example 16
Source File: BSPEvaluatorLocalModeTest.java From incubator-retired-mrql with Apache License 2.0 | 3 votes |
@Override protected Evaluator createEvaluator() throws Exception { Configuration conf = null; Config.bsp_mode = true; Config.spark_mode = false; Config.map_reduce_mode = false; Evaluator.evaluator = new BSPEvaluator(); Config.quiet_execution = true; String[] args = new String[] { "-local", "-bsp" }; conf = Evaluator.evaluator.new_configuration(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); args = gop.getRemainingArgs(); Config.hadoop_mode = true; Config.testing = true; Config.parse_args(args, conf); Evaluator.evaluator.init(conf); return Evaluator.evaluator; }
Example 17
Source File: BSPQueryLocalModeTest.java From incubator-retired-mrql with Apache License 2.0 | 3 votes |
@Override protected Evaluator createEvaluator() throws Exception { Configuration conf = new Configuration(); Config.bsp_mode = true; Config.spark_mode = false; Config.map_reduce_mode = false; Evaluator.evaluator = new BSPEvaluator(); Config.quiet_execution = true; String[] args = new String[] { "-local", "-bsp" }; conf = Evaluator.evaluator.new_configuration(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); args = gop.getRemainingArgs(); Config.hadoop_mode = true; Config.testing = true; Config.parse_args(args, conf); Evaluator.evaluator.init(conf); return Evaluator.evaluator; }
Example 18
Source File: MapReduceQueryLocalModeTest.java From incubator-retired-mrql with Apache License 2.0 | 3 votes |
@Override protected Evaluator createEvaluator() throws Exception { Configuration conf = new Configuration(); Config.bsp_mode = false; Config.spark_mode = false; Config.map_reduce_mode = true; Evaluator.evaluator = new MapReduceEvaluator(); Config.quiet_execution = true; String[] args = new String[] { "-local" }; conf = Evaluator.evaluator.new_configuration(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); args = gop.getRemainingArgs(); Config.hadoop_mode = true; Config.testing = true; Config.parse_args(args, conf); Evaluator.evaluator.init(conf); return Evaluator.evaluator; }
Example 19
Source File: MapReduceEvaluatorLocalModeTest.java From incubator-retired-mrql with Apache License 2.0 | 3 votes |
@Override protected Evaluator createEvaluator() throws Exception { Configuration conf = null; Config.bsp_mode = false; Config.spark_mode = false; Config.map_reduce_mode = true; Evaluator.evaluator = new MapReduceEvaluator(); Config.quiet_execution = true; String[] args = new String[] { "-local" }; conf = Evaluator.evaluator.new_configuration(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); args = gop.getRemainingArgs(); Config.hadoop_mode = true; Config.testing = true; Config.parse_args(args, conf); Evaluator.evaluator.init(conf); return Evaluator.evaluator; }
Example 20
Source File: MyWordCount.java From BigDataArchitect with Apache License 2.0 | 2 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(true); GenericOptionsParser parser = new GenericOptionsParser(conf, args); //工具类帮我们把-D 等等的属性直接set到conf,会留下commandOptions String[] othargs = parser.getRemainingArgs(); //让框架知道是windows异构平台运行 conf.set("mapreduce.app-submission.cross-platform","true"); // conf.set("mapreduce.framework.name","local"); // System.out.println(conf.get("mapreduce.framework.name")); Job job = Job.getInstance(conf); // FileInputFormat.setMinInputSplitSize(job,2222); // job.setInputFormatClass(ooxx.class); job.setJar("C:\\Users\\admin\\IdeaProjects\\msbhadoop\\target\\hadoop-hdfs-1.0-0.1.jar"); //必须必须写的 job.setJarByClass(MyWordCount.class); job.setJobName("mashibing"); Path infile = new Path(othargs[0]); TextInputFormat.addInputPath(job, infile); Path outfile = new Path(othargs[1]); if (outfile.getFileSystem(conf).exists(outfile)) outfile.getFileSystem(conf).delete(outfile, true); TextOutputFormat.setOutputPath(job, outfile); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(MyReducer.class); // job.setNumReduceTasks(2); // Submit the job, then poll for progress until the job is complete job.waitForCompletion(true); }