Java Code Examples for org.apache.hadoop.examples.terasort.TeraSort

The following examples show how to use org.apache.hadoop.examples.terasort.TeraSort. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: ignite   Source File: HadoopTeraSortTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Extracts package-private TeraSort total order partitioner class.
 *
 * @return The class.
 */
private Class<? extends Partitioner> getTeraSortTotalOrderPartitioner() {
    Class[] classes = TeraSort.class.getDeclaredClasses();

    Class<? extends Partitioner> totalOrderPartitionerCls = null;

    for (Class<?> x: classes) {
        if ("TotalOrderPartitioner".equals(x.getSimpleName())) {
            totalOrderPartitionerCls = (Class<? extends Partitioner>)x;

            break;
        }
    }

    if (totalOrderPartitionerCls == null)
        throw new IllegalStateException("Failed to find TeraSort total order partitioner class.");

    return totalOrderPartitionerCls;
}
 
Example 2
Source Project: hadoop   Source File: ExampleDriver.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String argv[]){
  int exitCode = -1;
  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
                 "A map/reduce program that counts the words in the input files.");
    pgd.addClass("wordmean", WordMean.class,
                 "A map/reduce program that counts the average length of the words in the input files.");
    pgd.addClass("wordmedian", WordMedian.class,
                 "A map/reduce program that counts the median length of the words in the input files.");
    pgd.addClass("wordstandarddeviation", WordStandardDeviation.class,
                 "A map/reduce program that counts the standard deviation of the length of the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                 "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
                 "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
                 "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
                 "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");

    pgd.addClass("pi", QuasiMonteCarlo.class, QuasiMonteCarlo.DESCRIPTION);
    pgd.addClass("bbp", BaileyBorweinPlouffe.class, BaileyBorweinPlouffe.DESCRIPTION);
    pgd.addClass("distbbp", DistBbp.class, DistBbp.DESCRIPTION);

    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
                 "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    exitCode = pgd.run(argv);
  }
  catch(Throwable e){
    e.printStackTrace();
  }
  
  System.exit(exitCode);
}
 
Example 3
Source Project: big-c   Source File: ExampleDriver.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String argv[]){
  int exitCode = -1;

  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
    "A map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
    "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
    "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
    "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
    "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
    pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
    "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    pgd.addClass("kmeans", Kmeans.class, "Kmeans on movies data");
    pgd.addClass("classification", Classification.class, "Classify movies into clusters");
    pgd.addClass("histogram_movies", HistogramMovies.class,
    "A map/reduce program that gives a histogram of movies based on ratings.");
    pgd.addClass("histogram_ratings", HistogramRatings.class,
    "A map/reduce program that gives a histogram of users ratings on movies.");
    pgd.addClass("selfjoin", SelfJoin.class,
    "A map/reduce program that creates k+1 associations given set of k-field associations");
    pgd.addClass("invertedindex", InvertedIndex.class,
    "A map/reduce program that creates an inverted index of documents.");
    pgd.addClass("adjlist", AdjList.class,
    "A map/reduce program that finds adjacency list of graph nodes.");
    pgd.addClass("termvectorperhost", TermVectorPerHost.class,
    "A map/reduce program that creates the term-vectors (frequency of words) per document.");
    pgd.addClass("sequencecount", SequenceCount.class,
    "A map/reduce program that counts the occurrence of consecutive words in the input files.");
    pgd.addClass("rankedinvertedindex", RankedInvertedIndex.class,
    "A map/reduce program that creates the top k document lists per word");


    pgd.driver(argv);

    // Success
    exitCode = 0;
  }
  catch(Throwable e){
    e.printStackTrace();
  }

  System.exit(exitCode);
}
 
Example 4
Source Project: ignite   Source File: HadoopTeraSortTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Creates Job instance and sets up necessary properties for it.
 * @param conf The Job config.
 * @return The job.
 * @throws Exception On error.
 */
private Job setupConfig(JobConf conf) throws Exception {
    Job job = Job.getInstance(conf);

    Path inputDir = new Path(generateOutDir);
    Path outputDir = new Path(sortOutDir);

    boolean useSimplePartitioner = TeraSort.getUseSimplePartitioner(job);

    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setJobName("TeraSort");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);

    if (useSimplePartitioner)
        job.setPartitionerClass(TeraSort.SimplePartitioner.class);
    else {
        long start = System.currentTimeMillis();

        Path partFile = new Path(outputDir, PARTITION_FILENAME);

        URI partUri = new URI(partFile.toString() + "#" + PARTITION_FILENAME);

        try {
            TeraInputFormat.writePartitionFile(job, partFile);
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }

        job.addCacheFile(partUri);

        long end = System.currentTimeMillis();

        System.out.println("Spent " + (end - start) + "ms computing partitions. " +
            "Partition file added to distributed cache: " + partUri);

        job.setPartitionerClass(getTeraSortTotalOrderPartitioner()/*TeraSort.TotalOrderPartitioner.class*/);
    }

    job.getConfiguration().setInt("dfs.replication", TeraSort.getOutputReplication(job));

    /* TeraOutputFormat.setFinalSync(job, true); */
    Method m = TeraOutputFormat.class.getDeclaredMethod("setFinalSync", JobContext.class, boolean.class);
    m.setAccessible(true);
    m.invoke(null, job, true);

    return job;
}
 
Example 5
Source Project: RDFS   Source File: ExampleDriver.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String argv[]){
  int exitCode = -1;
  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
                 "A map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                 "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
                 "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
                 "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
                 "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
    pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
                 "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    pgd.driver(argv);
    
    // Success
    exitCode = 0;
  }
  catch(Throwable e){
    e.printStackTrace();
  }
  
  System.exit(exitCode);
}
 
Example 6
Source Project: hadoop-gpu   Source File: ExampleDriver.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String argv[]){
  int exitCode = -1;
  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
                 "A map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                 "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
                 "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
                 "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
                 "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
    pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
                 "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    pgd.driver(argv);
    
    // Success
    exitCode = 0;
  }
  catch(Throwable e){
    e.printStackTrace();
  }
  
  System.exit(exitCode);
}