org.apache.hadoop.mapred.OutputCollector Java Examples

The following examples show how to use org.apache.hadoop.mapred.OutputCollector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestDFSIO.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override // IOMapperBase
void collectStats(OutputCollector<Text, Text> output, 
                  String name,
                  long execTime, 
                  Long objSize) throws IOException {
  long totalSize = objSize.longValue();
  float ioRateMbSec = (float)totalSize * 1000 / (execTime * MEGA);
  LOG.info("Number of bytes processed = " + totalSize);
  LOG.info("Exec time = " + execTime);
  LOG.info("IO rate = " + ioRateMbSec);
  
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"),
      new Text(String.valueOf(1)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
      new Text(String.valueOf(totalSize)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
      new Text(String.valueOf(execTime)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
      new Text(String.valueOf(ioRateMbSec*1000)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
      new Text(String.valueOf(ioRateMbSec*ioRateMbSec*1000)));
}
 
Example #2
Source File: DataJoinReducerBase.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void reduce(Object key, Iterator values,
                   OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }

  SortedMap<Object, ResetableIterator> groups = regroup(key, values, reporter);
  Object[] tags = groups.keySet().toArray();
  ResetableIterator[] groupValues = new ResetableIterator[tags.length];
  for (int i = 0; i < tags.length; i++) {
    groupValues[i] = groups.get(tags[i]);
  }
  joinAndCollect(tags, groupValues, key, output, reporter);
  addLongValue("groupCount", 1);
  for (int i = 0; i < tags.length; i++) {
    groupValues[i].close();
  }
}
 
Example #3
Source File: PipesReducer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void startApplication(OutputCollector<K3, V3> output, Reporter reporter) throws IOException {
  if (application == null) {
    try {
      LOG.info("starting application");
      application = 
        new Application<K2, V2, K3, V3>(
            job, null, output, reporter, 
            (Class<? extends K3>) job.getOutputKeyClass(), 
            (Class<? extends V3>) job.getOutputValueClass());
      downlink = application.getDownlink();
    } catch (InterruptedException ie) {
      throw new RuntimeException("interrupted", ie);
    }
    int reduce=0;
    downlink.runReduce(reduce, Submitter.getIsJavaRecordWriter(job));
  }
}
 
Example #4
Source File: ValueAggregatorCombiner.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Combines values for a given key.  
 * @param key the key is expected to be a Text object, whose prefix indicates
 * the type of aggregation to aggregate the values. 
 * @param values the values to combine
 * @param output to collect combined values
 */
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
  String keyStr = key.toString();
  int pos = keyStr.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR);
  String type = keyStr.substring(0, pos);
  ValueAggregator aggregator = ValueAggregatorBaseDescriptor
    .generateValueAggregator(type);
  while (values.hasNext()) {
    aggregator.addNextValue(values.next());
  }
  Iterator outputs = aggregator.getCombinerOutput().iterator();

  while (outputs.hasNext()) {
    Object v = outputs.next();
    if (v instanceof Text) {
      output.collect(key, (Text)v);
    } else {
      output.collect(key, new Text(v.toString()));
    }
  }
}
 
Example #5
Source File: HadoopArchives.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void reduce(IntWritable key, Iterator<Text> values,
    OutputCollector<Text, Text> out,
    Reporter reporter) throws IOException {
  keyVal = key.get();
  while(values.hasNext()) {
    Text value = values.next();
    String towrite = value.toString() + "\n";
    indexStream.write(towrite.getBytes(Charsets.UTF_8));
    written++;
    if (written > numIndexes -1) {
      // every 1000 indexes we report status
      reporter.setStatus("Creating index for archives");
      reporter.progress();
      endIndex = keyVal;
      String masterWrite = startIndex + " " + endIndex + " " + startPos 
                          +  " " + indexStream.getPos() + " \n" ;
      outStream.write(masterWrite.getBytes(Charsets.UTF_8));
      startPos = indexStream.getPos();
      startIndex = endIndex;
      written = 0;
    }
  }
}
 
Example #6
Source File: PipesReducer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Process all of the keys and values. Start up the application if we haven't
 * started it yet.
 */
public void reduce(K2 key, Iterator<V2> values, 
                   OutputCollector<K3, V3> output, Reporter reporter
                   ) throws IOException {
  isOk = false;
  startApplication(output, reporter);
  downlink.reduceKey(key);
  while (values.hasNext()) {
    downlink.reduceValue(values.next());
  }
  if(skipping) {
    //flush the streams on every record input if running in skip mode
    //so that we don't buffer other records surrounding a bad record.
    downlink.flush();
  }
  isOk = true;
}
 
Example #7
Source File: TestDatamerge.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(IntWritable key, IntWritable val,
    OutputCollector<IntWritable, IntWritable> out, Reporter reporter)
    throws IOException {
  int k = key.get();
  final int vali = val.get();
  final String kvstr = "Unexpected tuple: " + stringify(key, val);
  if (0 == k % (srcs * srcs)) {
    assertTrue(kvstr, vali == k * 10 / srcs + srcs - 1);
  } else {
    final int i = k % srcs;
    assertTrue(kvstr, srcs * (vali - i) == 10 * (k - i));
  }
  out.collect(key, one);
}
 
Example #8
Source File: ExternalMapperReducer.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(WritableComparable key, Writable value,
                OutputCollector<ExternalWritable, IntWritable> output,
                Reporter reporter)
  throws IOException {
  
  if (value instanceof Text) {
    Text text = (Text)value;
    ExternalWritable ext = new ExternalWritable(text.toString());
    output.collect(ext, new IntWritable(1));
  }
}
 
Example #9
Source File: JobControlTestUtils.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void reduce(Text key, Iterator<Text> values,
    OutputCollector<Text, Text> output, Reporter reporter)
    throws IOException {
  Text dumbKey = new Text("");
  while (values.hasNext()) {
    Text data = values.next();
    output.collect(dumbKey, data);
  }
}
 
Example #10
Source File: OutputHandler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Create a handler that will handle any records output from the application.
 * @param collector the "real" collector that takes the output
 * @param reporter the reporter for reporting progress
 */
public OutputHandler(OutputCollector<K, V> collector, Reporter reporter, 
                     RecordReader<FloatWritable,NullWritable> recordReader,
                     String expectedDigest) {
  this.reporter = reporter;
  this.collector = collector;
  this.recordReader = recordReader;
  this.expectedDigest = expectedDigest;
}
 
Example #11
Source File: ChainMapper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Chains the <code>map(...)</code> methods of the Mappers in the chain.
 */
@SuppressWarnings({"unchecked"})
public void map(Object key, Object value, OutputCollector output,
                Reporter reporter) throws IOException {
  Mapper mapper = chain.getFirstMap();
  if (mapper != null) {
    mapper.map(key, value, chain.getMapperCollector(0, output, reporter),
               reporter);
  }
}
 
Example #12
Source File: FieldSelectionMapReduce.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * The identify function. Input key/value pair is written directly to output.
 */
public void map(K key, V val,
    OutputCollector<Text, Text> output, Reporter reporter) 
    throws IOException {
  FieldSelectionHelper helper = new FieldSelectionHelper(
    FieldSelectionHelper.emptyText, FieldSelectionHelper.emptyText);
  helper.extractOutputKeyValue(key.toString(), val.toString(),
    fieldSeparator, mapOutputKeyFieldList, mapOutputValueFieldList,
    allMapValueFieldsFrom, ignoreInputKey, true);
  output.collect(helper.getKey(), helper.getValue());
}
 
Example #13
Source File: FieldSelectionMapReduce.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter)
  throws IOException {
  String keyStr = key.toString() + this.fieldSeparator;
  while (values.hasNext()) {
      FieldSelectionHelper helper = new FieldSelectionHelper();
      helper.extractOutputKeyValue(keyStr, values.next().toString(),
        fieldSeparator, reduceOutputKeyFieldList,
        reduceOutputValueFieldList, allReduceValueFieldsFrom, false, false);
    output.collect(helper.getKey(), helper.getValue());
  }
}
 
Example #14
Source File: ValueAggregatorMapper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 *  the map function. It iterates through the value aggregator descriptor 
 *  list to generate aggregation id/value pairs and emit them.
 */
public void map(K1 key, V1 value,
                OutputCollector<Text, Text> output, Reporter reporter) throws IOException {

  Iterator iter = this.aggregatorDescriptorList.iterator();
  while (iter.hasNext()) {
    ValueAggregatorDescriptor ad = (ValueAggregatorDescriptor) iter.next();
    Iterator<Entry<Text, Text>> ens =
      ad.generateKeyValPairs(key, value).iterator();
    while (ens.hasNext()) {
      Entry<Text, Text> en = ens.next();
      output.collect(en.getKey(), en.getValue());
    }
  }
}
 
Example #15
Source File: PipeMapRed.java    From hadoop with Apache License 2.0 5 votes vote down vote up
void startOutputThreads(OutputCollector output, Reporter reporter) 
  throws IOException {
  inWriter_ = createInputWriter();
  outReader_ = createOutputReader();
  outThread_ = new MROutputThread(outReader_, output, reporter);
  outThread_.start();
  errThread_ = new MRErrorThread();
  errThread_.setReporter(reporter);
  errThread_.start();
}
 
Example #16
Source File: DelegatingMapper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}
 
Example #17
Source File: LoadGeneratorMR.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void reduce(Text key, Iterator<IntWritable> values,
    OutputCollector<Text, IntWritable> output, Reporter reporter)
    throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  if (key.equals(OPEN_EXECTIME)){
    executionTime[OPEN] = sum;
  } else if (key.equals(NUMOPS_OPEN)){
    numOfOps[OPEN] = sum;
  } else if (key.equals(LIST_EXECTIME)){
    executionTime[LIST] = sum;
  } else if (key.equals(NUMOPS_LIST)){
    numOfOps[LIST] = sum;
  } else if (key.equals(DELETE_EXECTIME)){
    executionTime[DELETE] = sum;
  } else if (key.equals(NUMOPS_DELETE)){
    numOfOps[DELETE] = sum;
  } else if (key.equals(CREATE_EXECTIME)){
    executionTime[CREATE] = sum;
  } else if (key.equals(NUMOPS_CREATE)){
    numOfOps[CREATE] = sum;
  } else if (key.equals(WRITE_CLOSE_EXECTIME)){
    System.out.println(WRITE_CLOSE_EXECTIME + " = " + sum);
    executionTime[WRITE_CLOSE]= sum;
  } else if (key.equals(NUMOPS_WRITE_CLOSE)){
    numOfOps[WRITE_CLOSE] = sum;
  } else if (key.equals(TOTALOPS)){
    totalOps = sum;
  } else if (key.equals(ELAPSED_TIME)){
    totalTime = sum;
  }
  result.set(sum);
  output.collect(key, result);
  // System.out.println("Key = " + key + " Sum is =" + sum);
  // printResults(System.out);
}
 
Example #18
Source File: DataJoinReducerBase.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * The subclass can overwrite this method to perform additional filtering
 * and/or other processing logic before a value is collected.
 * 
 * @param key
 * @param aRecord
 * @param output
 * @param reporter
 * @throws IOException
 */
protected void collect(Object key, TaggedMapOutput aRecord,
                       OutputCollector output, Reporter reporter) throws IOException {
  this.collected += 1;
  addLongValue("collectedCount", 1);
  if (aRecord != null) {
    output.collect(key, aRecord.getData());
    reporter.setStatus("key: " + key.toString() + " collected: " + collected);
    addLongValue("actuallyCollectedCount", 1);
  }
}
 
Example #19
Source File: TestDatamerge.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(IntWritable key, TupleWritable val,
    OutputCollector<IntWritable, IntWritable> out, Reporter reporter)
    throws IOException {
  int k = key.get();
  final String kvstr = "Unexpected tuple: " + stringify(key, val);
  assertTrue(kvstr, 0 == k % (srcs * srcs));
  for (int i = 0; i < val.size(); ++i) {
    final int vali = ((IntWritable)val.get(i)).get();
    assertTrue(kvstr, (vali - i) * srcs == 10 * k);
  }
  out.collect(key, one);
}
 
Example #20
Source File: TestDatamerge.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void reduce(IntWritable key, Iterator<IntWritable> values,
                   OutputCollector<Text, Text> output,
                   Reporter reporter) throws IOException {
  int seen = 0;
  while (values.hasNext()) {
    seen += values.next().get();
  }
  assertTrue("Bad count for " + key.get(), verify(key.get(), seen));
}
 
Example #21
Source File: HadoopArchives.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(LongWritable key, HarEntry value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  Path relPath = new Path(value.path);
  int hash = HarFileSystem.getHarHash(relPath);
  String towrite = null;
  Path srcPath = realPath(relPath, rootPath);
  long startPos = partStream.getPos();
  FileSystem srcFs = srcPath.getFileSystem(conf);
  FileStatus srcStatus = srcFs.getFileStatus(srcPath);
  String propStr = encodeProperties(srcStatus);
  if (value.isDir()) { 
    towrite = encodeName(relPath.toString())
              + " dir " + propStr + " 0 0 ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: value.children) {
      sbuff.append(encodeName(child) + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = encodeName(relPath.toString())
              + " file " + partname + " " + startPos
              + " " + srcStatus.getLen() + " " + propStr + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}
 
Example #22
Source File: PipeMapRed.java    From hadoop with Apache License 2.0 5 votes vote down vote up
MROutputThread(OutputReader outReader, OutputCollector outCollector,
  Reporter reporter) {
  setDaemon(true);
  this.outReader = outReader;
  this.outCollector = outCollector;
  this.reporter = reporter;
}
 
Example #23
Source File: WordCount.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(LongWritable key, Text value, 
                OutputCollector<Text, IntWritable> output, 
                Reporter reporter) throws IOException {
  String line = value.toString();
  StringTokenizer itr = new StringTokenizer(line);
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    output.collect(word, one);
  }
}
 
Example #24
Source File: MRCaching.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void reduce(Text key, Iterator<IntWritable> values,
                   OutputCollector<Text, IntWritable> output,
                   Reporter reporter) throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  output.collect(key, new IntWritable(sum));
}
 
Example #25
Source File: MRCaching.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(LongWritable key, Text value,
                OutputCollector<Text, IntWritable> output,
                Reporter reporter) throws IOException {
  String line = value.toString();
  StringTokenizer itr = new StringTokenizer(line);
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    output.collect(word, one);
  }

}
 
Example #26
Source File: HadoopMapredCompatWordCount.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void map(LongWritable k, Text v, OutputCollector<Text, LongWritable> out, Reporter rep)
		throws IOException {
	// normalize and split the line
	String line = v.toString();
	String[] tokens = line.toLowerCase().split("\\W+");

	// emit the pairs
	for (String token : tokens) {
		if (token.length() > 0) {
			out.collect(new Text(token), new LongWritable(1L));
		}
	}
}
 
Example #27
Source File: IdentityReducer.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/** Writes all keys and values directly to output. */
public void reduce(K key, Iterator<V> values,
                   OutputCollector<K, V> output, Reporter reporter)
  throws IOException {
  while (values.hasNext()) {
    output.collect(key, values.next());
  }
}
 
Example #28
Source File: SliveReducer.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override // Reducer
public void reduce(Text key, Iterator<Text> values,
    OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
  OperationOutput collector = null;
  int reduceAm = 0;
  int errorAm = 0;
  logAndSetStatus(reporter, "Iterating over reduction values for key " + key);
  while (values.hasNext()) {
    Text value = values.next();
    try {
      OperationOutput val = new OperationOutput(key, value);
      if (collector == null) {
        collector = val;
      } else {
        collector = OperationOutput.merge(collector, val);
      }
      LOG.info("Combined " + val + " into/with " + collector);
      ++reduceAm;
    } catch (Exception e) {
      ++errorAm;
      logAndSetStatus(reporter, "Error iterating over reduction input "
          + value + " due to : " + StringUtils.stringifyException(e));
      if (getConfig().shouldExitOnFirstError()) {
        break;
      }
    }
  }
  logAndSetStatus(reporter, "Reduced " + reduceAm + " values with " + errorAm
      + " errors");
  if (collector != null) {
    logAndSetStatus(reporter, "Writing output " + collector.getKey() + " : "
        + collector.getOutputValue());
    output.collect(collector.getKey(), collector.getOutputValue());
  }
}
 
Example #29
Source File: TokenCountMapper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(K key, Text value,
                OutputCollector<Text, LongWritable> output,
                Reporter reporter)
  throws IOException {
  // get input text
  String text = value.toString();       // value is line of text

  // tokenize the value
  StringTokenizer st = new StringTokenizer(text);
  while (st.hasMoreTokens()) {
    // output <token,1> pairs
    output.collect(new Text(st.nextToken()), new LongWritable(1));
  }  
}
 
Example #30
Source File: FailMapper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(WritableComparable key, Writable value,
    OutputCollector<WritableComparable, Writable> out, Reporter reporter)
    throws IOException {
  // NOTE- the next line is required for the TestDebugScript test to succeed
  System.err.println("failing map");
  throw new RuntimeException("failing map");
}