org.apache.hadoop.mapred.lib.HashPartitioner Java Examples

The following examples show how to use org.apache.hadoop.mapred.lib.HashPartitioner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SortValidator.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get("map.input.file"));
      String inputFile = inputURI.getPath();
      partition = Integer.valueOf(
                                  inputFile.substring(inputFile.lastIndexOf("part")+5)
                                  ).intValue();
      noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}
 
Example #2
Source File: SortValidator.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE));
      String inputFile = inputURI.getPath();
      // part file is of the form part-r-xxxxx
      partition = Integer.valueOf(inputFile.substring(
        inputFile.lastIndexOf("part") + 7)).intValue();
      noSortReducers = job.getInt(SORT_REDUCES, -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}
 
Example #3
Source File: NodeReader.java    From nutch-htmlunit with Apache License 2.0 6 votes vote down vote up
/**
 * Prints the content of the Node represented by the url to system out.
 * 
 * @param webGraphDb The webgraph from which to get the node.
 * @param url The url of the node.
 * 
 * @throws IOException If an error occurs while getting the node.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  fs = FileSystem.get(getConf());
  nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    WebGraph.NODE_DIR), getConf());

  // open the readers, get the node, print out the info, and close the readers
  Text key = new Text(url);
  Node node = new Node();
  MapFileOutputFormat.getEntry(nodeReaders,
    new HashPartitioner<Text, Node>(), key, node);
  System.out.println(url + ":");
  System.out.println("  inlink score: " + node.getInlinkScore());
  System.out.println("  outlink score: " + node.getOutlinkScore());
  System.out.println("  num inlinks: " + node.getNumInlinks());
  System.out.println("  num outlinks: " + node.getNumOutlinks());
  FSUtils.closeReaders(nodeReaders);
}
 
Example #4
Source File: LoopReader.java    From anthelion with Apache License 2.0 6 votes vote down vote up
/**
 * Prints loopset for a single url.  The loopset information will show any
 * outlink url the eventually forms a link cycle.
 * 
 * @param webGraphDb The WebGraph to check for loops
 * @param url The url to check.
 * 
 * @throws IOException If an error occurs while printing loopset information.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  // open the readers
  fs = FileSystem.get(getConf());
  loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    Loops.LOOPS_DIR), getConf());

  // get the loopset for a given url, if any
  Text key = new Text(url);
  LoopSet loop = new LoopSet();
  MapFileOutputFormat.getEntry(loopReaders,
    new HashPartitioner<Text, LoopSet>(), key, loop);

  // print out each loop url in the set
  System.out.println(url + ":");
  for (String loopUrl : loop.getLoopSet()) {
    System.out.println("  " + loopUrl);
  }

  // close the readers
  FSUtils.closeReaders(loopReaders);
}
 
Example #5
Source File: NodeReader.java    From anthelion with Apache License 2.0 6 votes vote down vote up
/**
 * Prints the content of the Node represented by the url to system out.
 * 
 * @param webGraphDb The webgraph from which to get the node.
 * @param url The url of the node.
 * 
 * @throws IOException If an error occurs while getting the node.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  fs = FileSystem.get(getConf());
  nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    WebGraph.NODE_DIR), getConf());

  // open the readers, get the node, print out the info, and close the readers
  Text key = new Text(url);
  Node node = new Node();
  MapFileOutputFormat.getEntry(nodeReaders,
    new HashPartitioner<Text, Node>(), key, node);
  System.out.println(url + ":");
  System.out.println("  inlink score: " + node.getInlinkScore());
  System.out.println("  outlink score: " + node.getOutlinkScore());
  System.out.println("  num inlinks: " + node.getNumInlinks());
  System.out.println("  num outlinks: " + node.getNumOutlinks());
  FSUtils.closeReaders(nodeReaders);
}
 
Example #6
Source File: SortValidator.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE));
      String inputFile = inputURI.getPath();
      // part file is of the form part-r-xxxxx
      partition = Integer.valueOf(inputFile.substring(
        inputFile.lastIndexOf("part") + 7)).intValue();
      noSortReducers = job.getInt(SORT_REDUCES, -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}
 
Example #7
Source File: LoopReader.java    From nutch-htmlunit with Apache License 2.0 6 votes vote down vote up
/**
 * Prints loopset for a single url.  The loopset information will show any
 * outlink url the eventually forms a link cycle.
 * 
 * @param webGraphDb The WebGraph to check for loops
 * @param url The url to check.
 * 
 * @throws IOException If an error occurs while printing loopset information.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  // open the readers
  fs = FileSystem.get(getConf());
  loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    Loops.LOOPS_DIR), getConf());

  // get the loopset for a given url, if any
  Text key = new Text(url);
  LoopSet loop = new LoopSet();
  MapFileOutputFormat.getEntry(loopReaders,
    new HashPartitioner<Text, LoopSet>(), key, loop);

  // print out each loop url in the set
  System.out.println(url + ":");
  for (String loopUrl : loop.getLoopSet()) {
    System.out.println("  " + loopUrl);
  }

  // close the readers
  FSUtils.closeReaders(loopReaders);
}
 
Example #8
Source File: SortValidator.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get("map.input.file"));
      String inputFile = inputURI.getPath();
      partition = Integer.valueOf(
                                  inputFile.substring(inputFile.lastIndexOf("part")+5)
                                  ).intValue();
      noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}
 
Example #9
Source File: LinkDumper.java    From nutch-htmlunit with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args)
  throws Exception {
  
  if (args == null || args.length < 2) {
    System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
    return;
  }

  // open the readers for the linkdump directory
  Configuration conf = NutchConfiguration.create();
  FileSystem fs = FileSystem.get(conf);
  Path webGraphDb = new Path(args[0]);
  String url = args[1];
  MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
    webGraphDb, DUMP_DIR), conf);

  // get the link nodes for the url
  Text key = new Text(url);
  LinkNodes nodes = new LinkNodes();
  MapFileOutputFormat.getEntry(readers,
    new HashPartitioner<Text, LinkNodes>(), key, nodes);

  // print out the link nodes
  LinkNode[] linkNodesAr = nodes.getLinks();
  System.out.println(url + ":");
  for (LinkNode node : linkNodesAr) {
    System.out.println("  " + node.getUrl() + " - "
      + node.getNode().toString());
  }

  // close the readers
  FSUtils.closeReaders(readers);
}
 
Example #10
Source File: CrawlDbReader.java    From anthelion with Apache License 2.0 5 votes vote down vote up
public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
  Text key = new Text(url);
  CrawlDatum val = new CrawlDatum();
  openReaders(crawlDb, config);
  CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers,
      new HashPartitioner<Text, CrawlDatum>(), key, val);
  return res;
}
 
Example #11
Source File: LinkDumper.java    From anthelion with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args)
  throws Exception {
  
  if (args == null || args.length < 2) {
    System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
    return;
  }

  // open the readers for the linkdump directory
  Configuration conf = NutchConfiguration.create();
  FileSystem fs = FileSystem.get(conf);
  Path webGraphDb = new Path(args[0]);
  String url = args[1];
  MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
    webGraphDb, DUMP_DIR), conf);

  // get the link nodes for the url
  Text key = new Text(url);
  LinkNodes nodes = new LinkNodes();
  MapFileOutputFormat.getEntry(readers,
    new HashPartitioner<Text, LinkNodes>(), key, nodes);

  // print out the link nodes
  LinkNode[] linkNodesAr = nodes.getLinks();
  System.out.println(url + ":");
  for (LinkNode node : linkNodesAr) {
    System.out.println("  " + node.getUrl() + " - "
      + node.getNode().toString());
  }

  // close the readers
  FSUtils.closeReaders(readers);
}
 
Example #12
Source File: CrawlDbReader.java    From nutch-htmlunit with Apache License 2.0 5 votes vote down vote up
public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
  Text key = new Text(url);
  CrawlDatum val = new CrawlDatum();
  openReaders(crawlDb, config);
  CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers,
      new HashPartitioner<Text, CrawlDatum>(), key, val);
  return res;
}
 
Example #13
Source File: Submitter.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass("hadoop.pipes.partitioner", 
                       HashPartitioner.class,
                       Partitioner.class);
}
 
Example #14
Source File: Submitter.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass("hadoop.pipes.partitioner", 
                       HashPartitioner.class,
                       Partitioner.class);
}
 
Example #15
Source File: Submitter.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass(Submitter.PARTITIONER, 
                       HashPartitioner.class,
                       Partitioner.class);
}
 
Example #16
Source File: Submitter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass(Submitter.PARTITIONER, 
                       HashPartitioner.class,
                       Partitioner.class);
}
 
Example #17
Source File: JobConf.java    From RDFS with Apache License 2.0 2 votes vote down vote up
/**
 * Get the {@link Partitioner} used to partition {@link Mapper}-outputs
 * to be sent to the {@link Reducer}s.
 *
 * @return the {@link Partitioner} used to partition map-outputs.
 */
public Class<? extends Partitioner> getPartitionerClass() {
  return getClass("mapred.partitioner.class",
                  HashPartitioner.class, Partitioner.class);
}
 
Example #18
Source File: JobConf.java    From big-c with Apache License 2.0 2 votes vote down vote up
/**
 * Get the {@link Partitioner} used to partition {@link Mapper}-outputs 
 * to be sent to the {@link Reducer}s.
 * 
 * @return the {@link Partitioner} used to partition map-outputs.
 */
public Class<? extends Partitioner> getPartitionerClass() {
  return getClass("mapred.partitioner.class",
                  HashPartitioner.class, Partitioner.class);
}
 
Example #19
Source File: JobConf.java    From hadoop with Apache License 2.0 2 votes vote down vote up
/**
 * Get the {@link Partitioner} used to partition {@link Mapper}-outputs 
 * to be sent to the {@link Reducer}s.
 * 
 * @return the {@link Partitioner} used to partition map-outputs.
 */
public Class<? extends Partitioner> getPartitionerClass() {
  return getClass("mapred.partitioner.class",
                  HashPartitioner.class, Partitioner.class);
}
 
Example #20
Source File: JobConf.java    From hadoop-gpu with Apache License 2.0 2 votes vote down vote up
/**
 * Get the {@link Partitioner} used to partition {@link Mapper}-outputs 
 * to be sent to the {@link Reducer}s.
 * 
 * @return the {@link Partitioner} used to partition map-outputs.
 */
public Class<? extends Partitioner> getPartitionerClass() {
  return getClass("mapred.partitioner.class",
                  HashPartitioner.class, Partitioner.class);
}