org.apache.hadoop.examples.terasort.TeraSort Java Examples
The following examples show how to use
org.apache.hadoop.examples.terasort.TeraSort.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopTeraSortTest.java From ignite with Apache License 2.0 | 6 votes |
/** * Extracts package-private TeraSort total order partitioner class. * * @return The class. */ private Class<? extends Partitioner> getTeraSortTotalOrderPartitioner() { Class[] classes = TeraSort.class.getDeclaredClasses(); Class<? extends Partitioner> totalOrderPartitionerCls = null; for (Class<?> x: classes) { if ("TotalOrderPartitioner".equals(x.getSimpleName())) { totalOrderPartitionerCls = (Class<? extends Partitioner>)x; break; } } if (totalOrderPartitionerCls == null) throw new IllegalStateException("Failed to find TeraSort total order partitioner class."); return totalOrderPartitionerCls; }
Example #2
Source File: ExampleDriver.java From hadoop with Apache License 2.0 | 4 votes |
public static void main(String argv[]){ int exitCode = -1; ProgramDriver pgd = new ProgramDriver(); try { pgd.addClass("wordcount", WordCount.class, "A map/reduce program that counts the words in the input files."); pgd.addClass("wordmean", WordMean.class, "A map/reduce program that counts the average length of the words in the input files."); pgd.addClass("wordmedian", WordMedian.class, "A map/reduce program that counts the median length of the words in the input files."); pgd.addClass("wordstandarddeviation", WordStandardDeviation.class, "A map/reduce program that counts the standard deviation of the length of the words in the input files."); pgd.addClass("aggregatewordcount", AggregateWordCount.class, "An Aggregate based map/reduce program that counts the words in the input files."); pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, "An Aggregate based map/reduce program that computes the histogram of the words in the input files."); pgd.addClass("grep", Grep.class, "A map/reduce program that counts the matches of a regex in the input."); pgd.addClass("randomwriter", RandomWriter.class, "A map/reduce program that writes 10GB of random data per node."); pgd.addClass("randomtextwriter", RandomTextWriter.class, "A map/reduce program that writes 10GB of random textual data per node."); pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer."); pgd.addClass("pi", QuasiMonteCarlo.class, QuasiMonteCarlo.DESCRIPTION); pgd.addClass("bbp", BaileyBorweinPlouffe.class, BaileyBorweinPlouffe.DESCRIPTION); pgd.addClass("distbbp", DistBbp.class, DistBbp.DESCRIPTION); pgd.addClass("pentomino", DistributedPentomino.class, "A map/reduce tile laying program to find solutions to pentomino problems."); pgd.addClass("secondarysort", SecondarySort.class, "An example defining a secondary sort to the reduce."); pgd.addClass("sudoku", Sudoku.class, "A sudoku solver."); pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets"); pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files."); pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database."); pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort"); pgd.addClass("terasort", TeraSort.class, "Run the terasort"); pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort"); exitCode = pgd.run(argv); } catch(Throwable e){ e.printStackTrace(); } System.exit(exitCode); }
Example #3
Source File: ExampleDriver.java From big-c with Apache License 2.0 | 4 votes |
public static void main(String argv[]){ int exitCode = -1; ProgramDriver pgd = new ProgramDriver(); try { pgd.addClass("wordcount", WordCount.class, "A map/reduce program that counts the words in the input files."); pgd.addClass("aggregatewordcount", AggregateWordCount.class, "An Aggregate based map/reduce program that counts the words in the input files."); pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, "An Aggregate based map/reduce program that computes the histogram of the words in the input files."); pgd.addClass("grep", Grep.class, "A map/reduce program that counts the matches of a regex in the input."); pgd.addClass("randomwriter", RandomWriter.class, "A map/reduce program that writes 10GB of random data per node."); pgd.addClass("randomtextwriter", RandomTextWriter.class, "A map/reduce program that writes 10GB of random textual data per node."); pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer."); pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method."); pgd.addClass("pentomino", DistributedPentomino.class, "A map/reduce tile laying program to find solutions to pentomino problems."); pgd.addClass("secondarysort", SecondarySort.class, "An example defining a secondary sort to the reduce."); pgd.addClass("sudoku", Sudoku.class, "A sudoku solver."); pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task."); pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets"); pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files."); pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database."); pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort"); pgd.addClass("terasort", TeraSort.class, "Run the terasort"); pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort"); pgd.addClass("kmeans", Kmeans.class, "Kmeans on movies data"); pgd.addClass("classification", Classification.class, "Classify movies into clusters"); pgd.addClass("histogram_movies", HistogramMovies.class, "A map/reduce program that gives a histogram of movies based on ratings."); pgd.addClass("histogram_ratings", HistogramRatings.class, "A map/reduce program that gives a histogram of users ratings on movies."); pgd.addClass("selfjoin", SelfJoin.class, "A map/reduce program that creates k+1 associations given set of k-field associations"); pgd.addClass("invertedindex", InvertedIndex.class, "A map/reduce program that creates an inverted index of documents."); pgd.addClass("adjlist", AdjList.class, "A map/reduce program that finds adjacency list of graph nodes."); pgd.addClass("termvectorperhost", TermVectorPerHost.class, "A map/reduce program that creates the term-vectors (frequency of words) per document."); pgd.addClass("sequencecount", SequenceCount.class, "A map/reduce program that counts the occurrence of consecutive words in the input files."); pgd.addClass("rankedinvertedindex", RankedInvertedIndex.class, "A map/reduce program that creates the top k document lists per word"); pgd.driver(argv); // Success exitCode = 0; } catch(Throwable e){ e.printStackTrace(); } System.exit(exitCode); }
Example #4
Source File: HadoopTeraSortTest.java From ignite with Apache License 2.0 | 4 votes |
/** * Creates Job instance and sets up necessary properties for it. * @param conf The Job config. * @return The job. * @throws Exception On error. */ private Job setupConfig(JobConf conf) throws Exception { Job job = Job.getInstance(conf); Path inputDir = new Path(generateOutDir); Path outputDir = new Path(sortOutDir); boolean useSimplePartitioner = TeraSort.getUseSimplePartitioner(job); TeraInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TeraInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); if (useSimplePartitioner) job.setPartitionerClass(TeraSort.SimplePartitioner.class); else { long start = System.currentTimeMillis(); Path partFile = new Path(outputDir, PARTITION_FILENAME); URI partUri = new URI(partFile.toString() + "#" + PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partFile); } catch (Throwable e) { throw new RuntimeException(e); } job.addCacheFile(partUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions. " + "Partition file added to distributed cache: " + partUri); job.setPartitionerClass(getTeraSortTotalOrderPartitioner()/*TeraSort.TotalOrderPartitioner.class*/); } job.getConfiguration().setInt("dfs.replication", TeraSort.getOutputReplication(job)); /* TeraOutputFormat.setFinalSync(job, true); */ Method m = TeraOutputFormat.class.getDeclaredMethod("setFinalSync", JobContext.class, boolean.class); m.setAccessible(true); m.invoke(null, job, true); return job; }
Example #5
Source File: ExampleDriver.java From RDFS with Apache License 2.0 | 4 votes |
public static void main(String argv[]){ int exitCode = -1; ProgramDriver pgd = new ProgramDriver(); try { pgd.addClass("wordcount", WordCount.class, "A map/reduce program that counts the words in the input files."); pgd.addClass("aggregatewordcount", AggregateWordCount.class, "An Aggregate based map/reduce program that counts the words in the input files."); pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, "An Aggregate based map/reduce program that computes the histogram of the words in the input files."); pgd.addClass("grep", Grep.class, "A map/reduce program that counts the matches of a regex in the input."); pgd.addClass("randomwriter", RandomWriter.class, "A map/reduce program that writes 10GB of random data per node."); pgd.addClass("randomtextwriter", RandomTextWriter.class, "A map/reduce program that writes 10GB of random textual data per node."); pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer."); pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method."); pgd.addClass("pentomino", DistributedPentomino.class, "A map/reduce tile laying program to find solutions to pentomino problems."); pgd.addClass("secondarysort", SecondarySort.class, "An example defining a secondary sort to the reduce."); pgd.addClass("sudoku", Sudoku.class, "A sudoku solver."); pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task."); pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets"); pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files."); pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database."); pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort"); pgd.addClass("terasort", TeraSort.class, "Run the terasort"); pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort"); pgd.driver(argv); // Success exitCode = 0; } catch(Throwable e){ e.printStackTrace(); } System.exit(exitCode); }
Example #6
Source File: ExampleDriver.java From hadoop-gpu with Apache License 2.0 | 4 votes |
public static void main(String argv[]){ int exitCode = -1; ProgramDriver pgd = new ProgramDriver(); try { pgd.addClass("wordcount", WordCount.class, "A map/reduce program that counts the words in the input files."); pgd.addClass("aggregatewordcount", AggregateWordCount.class, "An Aggregate based map/reduce program that counts the words in the input files."); pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, "An Aggregate based map/reduce program that computes the histogram of the words in the input files."); pgd.addClass("grep", Grep.class, "A map/reduce program that counts the matches of a regex in the input."); pgd.addClass("randomwriter", RandomWriter.class, "A map/reduce program that writes 10GB of random data per node."); pgd.addClass("randomtextwriter", RandomTextWriter.class, "A map/reduce program that writes 10GB of random textual data per node."); pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer."); pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method."); pgd.addClass("pentomino", DistributedPentomino.class, "A map/reduce tile laying program to find solutions to pentomino problems."); pgd.addClass("secondarysort", SecondarySort.class, "An example defining a secondary sort to the reduce."); pgd.addClass("sudoku", Sudoku.class, "A sudoku solver."); pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task."); pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets"); pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files."); pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database."); pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort"); pgd.addClass("terasort", TeraSort.class, "Run the terasort"); pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort"); pgd.driver(argv); // Success exitCode = 0; } catch(Throwable e){ e.printStackTrace(); } System.exit(exitCode); }