Java Code Examples for org.apache.hadoop.io.WritableUtils#clone()

The following examples show how to use org.apache.hadoop.io.WritableUtils#clone() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NodeDumper.java    From anthelion with Apache License 2.0 6 votes vote down vote up
/**
 * Flips and collects the url and numeric sort value.
 */
public void reduce(FloatWritable key, Iterator<Text> values,
  OutputCollector<Text, FloatWritable> output, Reporter reporter)
  throws IOException {

  // take the negative of the negative to get original value, sometimes 0
  // value are a little weird
  float val = key.get();
  FloatWritable number = new FloatWritable(val == 0 ? 0 : -val);
  long numCollected = 0;

  // collect all values, this time with the url as key
  while (values.hasNext() && (numCollected < topn)) {
    Text url = WritableUtils.clone(values.next(), conf);
    output.collect(url, number);
    numCollected++;
  }
}
 
Example 2
Source File: ConfigurableHDFSFileSource.java    From components with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
protected KV<K, V> nextPair() throws IOException, InterruptedException {
    K key = currentReader.getCurrentKey();
    V value = currentReader.getCurrentValue();
    // clone Writable objects since they are reused between calls to RecordReader#nextKeyValue
    if (key instanceof Writable) {
        key = (K) WritableUtils.clone((Writable) key, conf);
    }
    if (value instanceof Writable) {
        value = (V) WritableUtils.clone((Writable) value, conf);
    }
    if (value instanceof BinaryComparable) {
        // test if exceed max row size.
        final int length = ((BinaryComparable) value).getLength();
        final int maxRowSize = conf.getInt("maxRowSize", 10 * 1024 * 1024);
        if (length >= maxRowSize) {
            throw new FileParameterException("Row size exceeded maximum allowed size (" + maxRowSize + ")");
        }
    }
    return KV.of(key, value);
}
 
Example 3
Source File: NodeDumper.java    From nutch-htmlunit with Apache License 2.0 6 votes vote down vote up
/**
 * Flips and collects the url and numeric sort value.
 */
public void reduce(FloatWritable key, Iterator<Text> values,
  OutputCollector<Text, FloatWritable> output, Reporter reporter)
  throws IOException {

  // take the negative of the negative to get original value, sometimes 0
  // value are a little weird
  float val = key.get();
  FloatWritable number = new FloatWritable(val == 0 ? 0 : -val);
  long numCollected = 0;

  // collect all values, this time with the url as key
  while (values.hasNext() && (numCollected < topn)) {
    Text url = WritableUtils.clone(values.next(), conf);
    output.collect(url, number);
    numCollected++;
  }
}
 
Example 4
Source File: ArrayListBackedIterator.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public boolean next(X val) throws IOException {
  if (iter.hasNext()) {
    ReflectionUtils.copy(conf, iter.next(), val);
    if (null == hold) {
      hold = WritableUtils.clone(val, null);
    } else {
      ReflectionUtils.copy(conf, val, hold);
    }
    return true;
  }
  return false;
}
 
Example 5
Source File: ArrayListBackedIterator.java    From big-c with Apache License 2.0 5 votes vote down vote up
public boolean next(X val) throws IOException {
  if (iter.hasNext()) {
    ReflectionUtils.copy(conf, iter.next(), val);
    if (null == hold) {
      hold = WritableUtils.clone(val, null);
    } else {
      ReflectionUtils.copy(conf, val, hold);
    }
    return true;
  }
  return false;
}
 
Example 6
Source File: LinkRank.java    From anthelion with Apache License 2.0 5 votes vote down vote up
public void map(Text key, Node node, OutputCollector<Text, Node> output,
  Reporter reporter)
  throws IOException {

  String url = key.toString();
  Node outNode = (Node)WritableUtils.clone(node, conf);
  outNode.setInlinkScore(initialScore);

  output.collect(new Text(url), outNode);
}
 
Example 7
Source File: ArrayListBackedIterator.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public boolean next(X val) throws IOException {
  if (iter.hasNext()) {
    WritableUtils.cloneInto(val, iter.next());
    if (null == hold) {
      hold = WritableUtils.clone(val, null);
    } else {
      WritableUtils.cloneInto(hold, val);
    }
    return true;
  }
  return false;
}
 
Example 8
Source File: LinkRank.java    From nutch-htmlunit with Apache License 2.0 5 votes vote down vote up
public void map(Text key, Node node, OutputCollector<Text, Node> output,
  Reporter reporter)
  throws IOException {

  String url = key.toString();
  Node outNode = WritableUtils.clone(node, conf);
  outNode.setInlinkScore(initialScore);

  output.collect(new Text(url), outNode);
}
 
Example 9
Source File: ArrayListBackedIterator.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public boolean next(X val) throws IOException {
  if (iter.hasNext()) {
    WritableUtils.cloneInto(val, iter.next());
    if (null == hold) {
      hold = WritableUtils.clone(val, null);
    } else {
      WritableUtils.cloneInto(hold, val);
    }
    return true;
  }
  return false;
}
 
Example 10
Source File: TaggedMapOutput.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public TaggedMapOutput clone(JobConf job) {
  return (TaggedMapOutput) WritableUtils.clone(this, job);
}
 
Example 11
Source File: TaggedMapOutput.java    From big-c with Apache License 2.0 4 votes vote down vote up
public TaggedMapOutput clone(JobConf job) {
  return (TaggedMapOutput) WritableUtils.clone(this, job);
}
 
Example 12
Source File: LinkRank.java    From anthelion with Apache License 2.0 4 votes vote down vote up
/**
 * Performs a single iteration of link analysis. The resulting scores are
 * stored in a temporary NodeDb which replaces the NodeDb of the WebGraph.
 */
public void reduce(Text key, Iterator<ObjectWritable> values,
  OutputCollector<Text, Node> output, Reporter reporter)
  throws IOException {

  String url = key.toString();
  Set<String> domains = new HashSet<String>();
  Set<String> pages = new HashSet<String>();
  Node node = null;

  // a page with zero inlinks has a score of rankOne
  int numInlinks = 0;
  float totalInlinkScore = rankOne;

  while (values.hasNext()) {

    ObjectWritable next = values.next();
    Object value = next.get();
    if (value instanceof Node) {
      node = (Node)value;
    }
    else if (value instanceof LinkDatum) {

      LinkDatum linkDatum = (LinkDatum)value;
      float scoreFromInlink = linkDatum.getScore();
      String inlinkUrl = linkDatum.getUrl();
      String inLinkDomain = URLUtil.getDomainName(inlinkUrl);
      String inLinkPage = URLUtil.getPage(inlinkUrl);

      // limit counting duplicate inlinks by pages or domains
      if ((limitPages && pages.contains(inLinkPage))
        || (limitDomains && domains.contains(inLinkDomain))) {
        LOG.debug(url + ": ignoring " + scoreFromInlink + " from "
          + inlinkUrl + ", duplicate page or domain");
        continue;
      }

      // aggregate total inlink score
      numInlinks++;
      totalInlinkScore += scoreFromInlink;
      domains.add(inLinkDomain);
      pages.add(inLinkPage);
      LOG.debug(url + ": adding " + scoreFromInlink + " from " + inlinkUrl
        + ", total: " + totalInlinkScore);
    }
  }

  // calculate linkRank score formula
  float linkRankScore = (1 - this.dampingFactor)
    + (this.dampingFactor * totalInlinkScore);

  LOG.debug(url + ": score: " + linkRankScore + " num inlinks: "
    + numInlinks + " iteration: " + itNum);

  // store the score in a temporary NodeDb
  Node outNode = (Node)WritableUtils.clone(node, conf);
  outNode.setInlinkScore(linkRankScore);
  output.collect(key, outNode);
}
 
Example 13
Source File: OutputValue.java    From hiped2 with Apache License 2.0 4 votes vote down vote up
public OutputValue clone(JobConf job) {
  return WritableUtils.clone(this, job);
}
 
Example 14
Source File: TaggedMapOutput.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public TaggedMapOutput clone(JobConf job) {
  return (TaggedMapOutput) WritableUtils.clone(this, job);
}
 
Example 15
Source File: LinkRank.java    From nutch-htmlunit with Apache License 2.0 4 votes vote down vote up
/**
 * Performs a single iteration of link analysis. The resulting scores are
 * stored in a temporary NodeDb which replaces the NodeDb of the WebGraph.
 */
public void reduce(Text key, Iterator<ObjectWritable> values,
  OutputCollector<Text, Node> output, Reporter reporter)
  throws IOException {

  String url = key.toString();
  Set<String> domains = new HashSet<String>();
  Set<String> pages = new HashSet<String>();
  Node node = null;

  // a page with zero inlinks has a score of rankOne
  int numInlinks = 0;
  float totalInlinkScore = rankOne;

  while (values.hasNext()) {

    ObjectWritable next = values.next();
    Object value = next.get();
    if (value instanceof Node) {
      node = (Node)value;
    }
    else if (value instanceof LinkDatum) {

      LinkDatum linkDatum = (LinkDatum)value;
      float scoreFromInlink = linkDatum.getScore();
      String inlinkUrl = linkDatum.getUrl();
      String inLinkDomain = URLUtil.getDomainName(inlinkUrl);
      String inLinkPage = URLUtil.getPage(inlinkUrl);

      // limit counting duplicate inlinks by pages or domains
      if ((limitPages && pages.contains(inLinkPage))
        || (limitDomains && domains.contains(inLinkDomain))) {
        LOG.debug(url + ": ignoring " + scoreFromInlink + " from "
          + inlinkUrl + ", duplicate page or domain");
        continue;
      }

      // aggregate total inlink score
      numInlinks++;
      totalInlinkScore += scoreFromInlink;
      domains.add(inLinkDomain);
      pages.add(inLinkPage);
      LOG.debug(url + ": adding " + scoreFromInlink + " from " + inlinkUrl
        + ", total: " + totalInlinkScore);
    }
  }

  // calculate linkRank score formula
  float linkRankScore = (1 - this.dampingFactor)
    + (this.dampingFactor * totalInlinkScore);

  LOG.debug(url + ": score: " + linkRankScore + " num inlinks: "
    + numInlinks + " iteration: " + itNum);

  // store the score in a temporary NodeDb
  Node outNode = WritableUtils.clone(node, conf);
  outNode.setInlinkScore(linkRankScore);
  output.collect(key, outNode);
}
 
Example 16
Source File: TaggedMapOutput.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public TaggedMapOutput clone(JobConf job) {
  return (TaggedMapOutput) WritableUtils.clone(this, job);
}