org.apache.hadoop.examples.terasort.TeraSort Java Exaples

Source File: HadoopTeraSortTest.java From ignite with Apache License 2.0

6 votes

/**
 * Extracts package-private TeraSort total order partitioner class.
 *
 * @return The class.
 */
private Class<? extends Partitioner> getTeraSortTotalOrderPartitioner() {
    Class[] classes = TeraSort.class.getDeclaredClasses();

    Class<? extends Partitioner> totalOrderPartitionerCls = null;

    for (Class<?> x: classes) {
        if ("TotalOrderPartitioner".equals(x.getSimpleName())) {
            totalOrderPartitionerCls = (Class<? extends Partitioner>)x;

            break;
        }
    }

    if (totalOrderPartitionerCls == null)
        throw new IllegalStateException("Failed to find TeraSort total order partitioner class.");

    return totalOrderPartitionerCls;
}

Source File: ExampleDriver.java From hadoop with Apache License 2.0

4 votes

public static void main(String argv[]){
  int exitCode = -1;
  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
                 "A map/reduce program that counts the words in the input files.");
    pgd.addClass("wordmean", WordMean.class,
                 "A map/reduce program that counts the average length of the words in the input files.");
    pgd.addClass("wordmedian", WordMedian.class,
                 "A map/reduce program that counts the median length of the words in the input files.");
    pgd.addClass("wordstandarddeviation", WordStandardDeviation.class,
                 "A map/reduce program that counts the standard deviation of the length of the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                 "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
                 "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
                 "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
                 "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");

    pgd.addClass("pi", QuasiMonteCarlo.class, QuasiMonteCarlo.DESCRIPTION);
    pgd.addClass("bbp", BaileyBorweinPlouffe.class, BaileyBorweinPlouffe.DESCRIPTION);
    pgd.addClass("distbbp", DistBbp.class, DistBbp.DESCRIPTION);

    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
                 "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    exitCode = pgd.run(argv);
  }
  catch(Throwable e){
    e.printStackTrace();
  }
  
  System.exit(exitCode);
}

Source File: ExampleDriver.java From big-c with Apache License 2.0

4 votes

public static void main(String argv[]){
  int exitCode = -1;

  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
    "A map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
    "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
    "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
    "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
    "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
    pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
    "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    pgd.addClass("kmeans", Kmeans.class, "Kmeans on movies data");
    pgd.addClass("classification", Classification.class, "Classify movies into clusters");
    pgd.addClass("histogram_movies", HistogramMovies.class,
    "A map/reduce program that gives a histogram of movies based on ratings.");
    pgd.addClass("histogram_ratings", HistogramRatings.class,
    "A map/reduce program that gives a histogram of users ratings on movies.");
    pgd.addClass("selfjoin", SelfJoin.class,
    "A map/reduce program that creates k+1 associations given set of k-field associations");
    pgd.addClass("invertedindex", InvertedIndex.class,
    "A map/reduce program that creates an inverted index of documents.");
    pgd.addClass("adjlist", AdjList.class,
    "A map/reduce program that finds adjacency list of graph nodes.");
    pgd.addClass("termvectorperhost", TermVectorPerHost.class,
    "A map/reduce program that creates the term-vectors (frequency of words) per document.");
    pgd.addClass("sequencecount", SequenceCount.class,
    "A map/reduce program that counts the occurrence of consecutive words in the input files.");
    pgd.addClass("rankedinvertedindex", RankedInvertedIndex.class,
    "A map/reduce program that creates the top k document lists per word");


    pgd.driver(argv);

    // Success
    exitCode = 0;
  }
  catch(Throwable e){
    e.printStackTrace();
  }

  System.exit(exitCode);
}

Source File: HadoopTeraSortTest.java From ignite with Apache License 2.0

4 votes

/**
 * Creates Job instance and sets up necessary properties for it.
 * @param conf The Job config.
 * @return The job.
 * @throws Exception On error.
 */
private Job setupConfig(JobConf conf) throws Exception {
    Job job = Job.getInstance(conf);

    Path inputDir = new Path(generateOutDir);
    Path outputDir = new Path(sortOutDir);

    boolean useSimplePartitioner = TeraSort.getUseSimplePartitioner(job);

    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setJobName("TeraSort");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TeraInputFormat.class);
    job.setOutputFormatClass(TeraOutputFormat.class);

    if (useSimplePartitioner)
        job.setPartitionerClass(TeraSort.SimplePartitioner.class);
    else {
        long start = System.currentTimeMillis();

        Path partFile = new Path(outputDir, PARTITION_FILENAME);

        URI partUri = new URI(partFile.toString() + "#" + PARTITION_FILENAME);

        try {
            TeraInputFormat.writePartitionFile(job, partFile);
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }

        job.addCacheFile(partUri);

        long end = System.currentTimeMillis();

        System.out.println("Spent " + (end - start) + "ms computing partitions. " +
            "Partition file added to distributed cache: " + partUri);

        job.setPartitionerClass(getTeraSortTotalOrderPartitioner()/*TeraSort.TotalOrderPartitioner.class*/);
    }

    job.getConfiguration().setInt("dfs.replication", TeraSort.getOutputReplication(job));

    /* TeraOutputFormat.setFinalSync(job, true); */
    Method m = TeraOutputFormat.class.getDeclaredMethod("setFinalSync", JobContext.class, boolean.class);
    m.setAccessible(true);
    m.invoke(null, job, true);

    return job;
}

Source File: ExampleDriver.java From RDFS with Apache License 2.0

4 votes

public static void main(String argv[]){
  int exitCode = -1;
  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
                 "A map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                 "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
                 "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
                 "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
                 "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
    pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
                 "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    pgd.driver(argv);
    
    // Success
    exitCode = 0;
  }
  catch(Throwable e){
    e.printStackTrace();
  }
  
  System.exit(exitCode);
}

Source File: ExampleDriver.java From hadoop-gpu with Apache License 2.0

4 votes

public static void main(String argv[]){
  int exitCode = -1;
  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
                 "A map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                 "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
                 "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
                 "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
                 "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
    pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
                 "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    pgd.driver(argv);
    
    // Success
    exitCode = 0;
  }
  catch(Throwable e){
    e.printStackTrace();
  }
  
  System.exit(exitCode);
}

org.apache.hadoop.examples.terasort.TeraSort Java Examples