org.apache.hadoop.mapred.OutputCollector Java Examples

The following examples show how to use org.apache.hadoop.mapred.OutputCollector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestDFSIO.java From hadoop with Apache License 2.0

6 votes

@Override // IOMapperBase
void collectStats(OutputCollector<Text, Text> output, 
                  String name,
                  long execTime, 
                  Long objSize) throws IOException {
  long totalSize = objSize.longValue();
  float ioRateMbSec = (float)totalSize * 1000 / (execTime * MEGA);
  LOG.info("Number of bytes processed = " + totalSize);
  LOG.info("Exec time = " + execTime);
  LOG.info("IO rate = " + ioRateMbSec);
  
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"),
      new Text(String.valueOf(1)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
      new Text(String.valueOf(totalSize)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
      new Text(String.valueOf(execTime)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
      new Text(String.valueOf(ioRateMbSec*1000)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
      new Text(String.valueOf(ioRateMbSec*ioRateMbSec*1000)));
}

Example #2

Source File: DataJoinReducerBase.java From hadoop with Apache License 2.0

6 votes

public void reduce(Object key, Iterator values,
                   OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }

  SortedMap<Object, ResetableIterator> groups = regroup(key, values, reporter);
  Object[] tags = groups.keySet().toArray();
  ResetableIterator[] groupValues = new ResetableIterator[tags.length];
  for (int i = 0; i < tags.length; i++) {
    groupValues[i] = groups.get(tags[i]);
  }
  joinAndCollect(tags, groupValues, key, output, reporter);
  addLongValue("groupCount", 1);
  for (int i = 0; i < tags.length; i++) {
    groupValues[i].close();
  }
}

Example #3

Source File: PipesReducer.java From hadoop with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
private void startApplication(OutputCollector<K3, V3> output, Reporter reporter) throws IOException {
  if (application == null) {
    try {
      LOG.info("starting application");
      application = 
        new Application<K2, V2, K3, V3>(
            job, null, output, reporter, 
            (Class<? extends K3>) job.getOutputKeyClass(), 
            (Class<? extends V3>) job.getOutputValueClass());
      downlink = application.getDownlink();
    } catch (InterruptedException ie) {
      throw new RuntimeException("interrupted", ie);
    }
    int reduce=0;
    downlink.runReduce(reduce, Submitter.getIsJavaRecordWriter(job));
  }
}

Example #4

Source File: ValueAggregatorCombiner.java From hadoop with Apache License 2.0

6 votes

/** Combines values for a given key.  
 * @param key the key is expected to be a Text object, whose prefix indicates
 * the type of aggregation to aggregate the values. 
 * @param values the values to combine
 * @param output to collect combined values
 */
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
  String keyStr = key.toString();
  int pos = keyStr.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR);
  String type = keyStr.substring(0, pos);
  ValueAggregator aggregator = ValueAggregatorBaseDescriptor
    .generateValueAggregator(type);
  while (values.hasNext()) {
    aggregator.addNextValue(values.next());
  }
  Iterator outputs = aggregator.getCombinerOutput().iterator();

  while (outputs.hasNext()) {
    Object v = outputs.next();
    if (v instanceof Text) {
      output.collect(key, (Text)v);
    } else {
      output.collect(key, new Text(v.toString()));
    }
  }
}

Example #5

Source File: HadoopArchives.java From hadoop with Apache License 2.0

6 votes

public void reduce(IntWritable key, Iterator<Text> values,
    OutputCollector<Text, Text> out,
    Reporter reporter) throws IOException {
  keyVal = key.get();
  while(values.hasNext()) {
    Text value = values.next();
    String towrite = value.toString() + "\n";
    indexStream.write(towrite.getBytes(Charsets.UTF_8));
    written++;
    if (written > numIndexes -1) {
      // every 1000 indexes we report status
      reporter.setStatus("Creating index for archives");
      reporter.progress();
      endIndex = keyVal;
      String masterWrite = startIndex + " " + endIndex + " " + startPos 
                          +  " " + indexStream.getPos() + " \n" ;
      outStream.write(masterWrite.getBytes(Charsets.UTF_8));
      startPos = indexStream.getPos();
      startIndex = endIndex;
      written = 0;
    }
  }
}

Example #6

Source File: PipesReducer.java From hadoop with Apache License 2.0

6 votes

/**
 * Process all of the keys and values. Start up the application if we haven't
 * started it yet.
 */
public void reduce(K2 key, Iterator<V2> values, 
                   OutputCollector<K3, V3> output, Reporter reporter
                   ) throws IOException {
  isOk = false;
  startApplication(output, reporter);
  downlink.reduceKey(key);
  while (values.hasNext()) {
    downlink.reduceValue(values.next());
  }
  if(skipping) {
    //flush the streams on every record input if running in skip mode
    //so that we don't buffer other records surrounding a bad record.
    downlink.flush();
  }
  isOk = true;
}

Example #7

Source File: TestDatamerge.java From hadoop with Apache License 2.0

5 votes

public void map(IntWritable key, IntWritable val,
    OutputCollector<IntWritable, IntWritable> out, Reporter reporter)
    throws IOException {
  int k = key.get();
  final int vali = val.get();
  final String kvstr = "Unexpected tuple: " + stringify(key, val);
  if (0 == k % (srcs * srcs)) {
    assertTrue(kvstr, vali == k * 10 / srcs + srcs - 1);
  } else {
    final int i = k % srcs;
    assertTrue(kvstr, srcs * (vali - i) == 10 * (k - i));
  }
  out.collect(key, one);
}

Example #8

Source File: ExternalMapperReducer.java From hadoop with Apache License 2.0

5 votes

public void map(WritableComparable key, Writable value,
                OutputCollector<ExternalWritable, IntWritable> output,
                Reporter reporter)
  throws IOException {
  
  if (value instanceof Text) {
    Text text = (Text)value;
    ExternalWritable ext = new ExternalWritable(text.toString());
    output.collect(ext, new IntWritable(1));
  }
}

Example #9

Source File: JobControlTestUtils.java From hadoop with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<Text> values,
    OutputCollector<Text, Text> output, Reporter reporter)
    throws IOException {
  Text dumbKey = new Text("");
  while (values.hasNext()) {
    Text data = values.next();
    output.collect(dumbKey, data);
  }
}

Example #10

Source File: OutputHandler.java From hadoop with Apache License 2.0

5 votes

/**
 * Create a handler that will handle any records output from the application.
 * @param collector the "real" collector that takes the output
 * @param reporter the reporter for reporting progress
 */
public OutputHandler(OutputCollector<K, V> collector, Reporter reporter, 
                     RecordReader<FloatWritable,NullWritable> recordReader,
                     String expectedDigest) {
  this.reporter = reporter;
  this.collector = collector;
  this.recordReader = recordReader;
  this.expectedDigest = expectedDigest;
}

Example #11

Source File: ChainMapper.java From hadoop with Apache License 2.0

5 votes

/**
 * Chains the <code>map(...)</code> methods of the Mappers in the chain.
 */
@SuppressWarnings({"unchecked"})
public void map(Object key, Object value, OutputCollector output,
                Reporter reporter) throws IOException {
  Mapper mapper = chain.getFirstMap();
  if (mapper != null) {
    mapper.map(key, value, chain.getMapperCollector(0, output, reporter),
               reporter);
  }
}

Example #12

Source File: FieldSelectionMapReduce.java From hadoop with Apache License 2.0

5 votes

/**
 * The identify function. Input key/value pair is written directly to output.
 */
public void map(K key, V val,
    OutputCollector<Text, Text> output, Reporter reporter) 
    throws IOException {
  FieldSelectionHelper helper = new FieldSelectionHelper(
    FieldSelectionHelper.emptyText, FieldSelectionHelper.emptyText);
  helper.extractOutputKeyValue(key.toString(), val.toString(),
    fieldSeparator, mapOutputKeyFieldList, mapOutputValueFieldList,
    allMapValueFieldsFrom, ignoreInputKey, true);
  output.collect(helper.getKey(), helper.getValue());
}

Example #13

Source File: FieldSelectionMapReduce.java From hadoop with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter)
  throws IOException {
  String keyStr = key.toString() + this.fieldSeparator;
  while (values.hasNext()) {
      FieldSelectionHelper helper = new FieldSelectionHelper();
      helper.extractOutputKeyValue(keyStr, values.next().toString(),
        fieldSeparator, reduceOutputKeyFieldList,
        reduceOutputValueFieldList, allReduceValueFieldsFrom, false, false);
    output.collect(helper.getKey(), helper.getValue());
  }
}

Example #14

Source File: ValueAggregatorMapper.java From hadoop with Apache License 2.0

5 votes

/**
 *  the map function. It iterates through the value aggregator descriptor 
 *  list to generate aggregation id/value pairs and emit them.
 */
public void map(K1 key, V1 value,
                OutputCollector<Text, Text> output, Reporter reporter) throws IOException {

  Iterator iter = this.aggregatorDescriptorList.iterator();
  while (iter.hasNext()) {
    ValueAggregatorDescriptor ad = (ValueAggregatorDescriptor) iter.next();
    Iterator<Entry<Text, Text>> ens =
      ad.generateKeyValPairs(key, value).iterator();
    while (ens.hasNext()) {
      Entry<Text, Text> en = ens.next();
      output.collect(en.getKey(), en.getValue());
    }
  }
}

Example #15

Source File: PipeMapRed.java From hadoop with Apache License 2.0

5 votes

void startOutputThreads(OutputCollector output, Reporter reporter) 
  throws IOException {
  inWriter_ = createInputWriter();
  outReader_ = createOutputReader();
  outThread_ = new MROutputThread(outReader_, output, reporter);
  outThread_.start();
  errThread_ = new MRErrorThread();
  errThread_.setReporter(reporter);
  errThread_.start();
}

Example #16

Source File: DelegatingMapper.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}

Example #17

Source File: LoadGeneratorMR.java From hadoop with Apache License 2.0

5 votes

@Override
public void reduce(Text key, Iterator<IntWritable> values,
    OutputCollector<Text, IntWritable> output, Reporter reporter)
    throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  if (key.equals(OPEN_EXECTIME)){
    executionTime[OPEN] = sum;
  } else if (key.equals(NUMOPS_OPEN)){
    numOfOps[OPEN] = sum;
  } else if (key.equals(LIST_EXECTIME)){
    executionTime[LIST] = sum;
  } else if (key.equals(NUMOPS_LIST)){
    numOfOps[LIST] = sum;
  } else if (key.equals(DELETE_EXECTIME)){
    executionTime[DELETE] = sum;
  } else if (key.equals(NUMOPS_DELETE)){
    numOfOps[DELETE] = sum;
  } else if (key.equals(CREATE_EXECTIME)){
    executionTime[CREATE] = sum;
  } else if (key.equals(NUMOPS_CREATE)){
    numOfOps[CREATE] = sum;
  } else if (key.equals(WRITE_CLOSE_EXECTIME)){
    System.out.println(WRITE_CLOSE_EXECTIME + " = " + sum);
    executionTime[WRITE_CLOSE]= sum;
  } else if (key.equals(NUMOPS_WRITE_CLOSE)){
    numOfOps[WRITE_CLOSE] = sum;
  } else if (key.equals(TOTALOPS)){
    totalOps = sum;
  } else if (key.equals(ELAPSED_TIME)){
    totalTime = sum;
  }
  result.set(sum);
  output.collect(key, result);
  // System.out.println("Key = " + key + " Sum is =" + sum);
  // printResults(System.out);
}

Example #18

Source File: DataJoinReducerBase.java From hadoop with Apache License 2.0

5 votes

/**
 * The subclass can overwrite this method to perform additional filtering
 * and/or other processing logic before a value is collected.
 * 
 * @param key
 * @param aRecord
 * @param output
 * @param reporter
 * @throws IOException
 */
protected void collect(Object key, TaggedMapOutput aRecord,
                       OutputCollector output, Reporter reporter) throws IOException {
  this.collected += 1;
  addLongValue("collectedCount", 1);
  if (aRecord != null) {
    output.collect(key, aRecord.getData());
    reporter.setStatus("key: " + key.toString() + " collected: " + collected);
    addLongValue("actuallyCollectedCount", 1);
  }
}

Example #19

Source File: TestDatamerge.java From hadoop with Apache License 2.0

5 votes

public void map(IntWritable key, TupleWritable val,
    OutputCollector<IntWritable, IntWritable> out, Reporter reporter)
    throws IOException {
  int k = key.get();
  final String kvstr = "Unexpected tuple: " + stringify(key, val);
  assertTrue(kvstr, 0 == k % (srcs * srcs));
  for (int i = 0; i < val.size(); ++i) {
    final int vali = ((IntWritable)val.get(i)).get();
    assertTrue(kvstr, (vali - i) * srcs == 10 * k);
  }
  out.collect(key, one);
}

Example #20

Source File: TestDatamerge.java From hadoop with Apache License 2.0

5 votes

public void reduce(IntWritable key, Iterator<IntWritable> values,
                   OutputCollector<Text, Text> output,
                   Reporter reporter) throws IOException {
  int seen = 0;
  while (values.hasNext()) {
    seen += values.next().get();
  }
  assertTrue("Bad count for " + key.get(), verify(key.get(), seen));
}

Example #21

Source File: HadoopArchives.java From hadoop with Apache License 2.0

5 votes

public void map(LongWritable key, HarEntry value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  Path relPath = new Path(value.path);
  int hash = HarFileSystem.getHarHash(relPath);
  String towrite = null;
  Path srcPath = realPath(relPath, rootPath);
  long startPos = partStream.getPos();
  FileSystem srcFs = srcPath.getFileSystem(conf);
  FileStatus srcStatus = srcFs.getFileStatus(srcPath);
  String propStr = encodeProperties(srcStatus);
  if (value.isDir()) { 
    towrite = encodeName(relPath.toString())
              + " dir " + propStr + " 0 0 ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: value.children) {
      sbuff.append(encodeName(child) + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = encodeName(relPath.toString())
              + " file " + partname + " " + startPos
              + " " + srcStatus.getLen() + " " + propStr + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}

Example #22

Source File: PipeMapRed.java From hadoop with Apache License 2.0

5 votes

MROutputThread(OutputReader outReader, OutputCollector outCollector,
  Reporter reporter) {
  setDaemon(true);
  this.outReader = outReader;
  this.outCollector = outCollector;
  this.reporter = reporter;
}

Example #23

Source File: WordCount.java From hadoop with Apache License 2.0

5 votes

public void map(LongWritable key, Text value, 
                OutputCollector<Text, IntWritable> output, 
                Reporter reporter) throws IOException {
  String line = value.toString();
  StringTokenizer itr = new StringTokenizer(line);
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    output.collect(word, one);
  }
}

Example #24

Source File: MRCaching.java From hadoop with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<IntWritable> values,
                   OutputCollector<Text, IntWritable> output,
                   Reporter reporter) throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  output.collect(key, new IntWritable(sum));
}

Example #25

Source File: MRCaching.java From hadoop with Apache License 2.0

5 votes

public void map(LongWritable key, Text value,
                OutputCollector<Text, IntWritable> output,
                Reporter reporter) throws IOException {
  String line = value.toString();
  StringTokenizer itr = new StringTokenizer(line);
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    output.collect(word, one);
  }

}

Example #26

Source File: HadoopMapredCompatWordCount.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void map(LongWritable k, Text v, OutputCollector<Text, LongWritable> out, Reporter rep)
		throws IOException {
	// normalize and split the line
	String line = v.toString();
	String[] tokens = line.toLowerCase().split("\\W+");

	// emit the pairs
	for (String token : tokens) {
		if (token.length() > 0) {
			out.collect(new Text(token), new LongWritable(1L));
		}
	}
}

Example #27

Source File: IdentityReducer.java From hadoop with Apache License 2.0

5 votes

/** Writes all keys and values directly to output. */
public void reduce(K key, Iterator<V> values,
                   OutputCollector<K, V> output, Reporter reporter)
  throws IOException {
  while (values.hasNext()) {
    output.collect(key, values.next());
  }
}

Example #28

Source File: SliveReducer.java From hadoop with Apache License 2.0

5 votes

@Override // Reducer
public void reduce(Text key, Iterator<Text> values,
    OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
  OperationOutput collector = null;
  int reduceAm = 0;
  int errorAm = 0;
  logAndSetStatus(reporter, "Iterating over reduction values for key " + key);
  while (values.hasNext()) {
    Text value = values.next();
    try {
      OperationOutput val = new OperationOutput(key, value);
      if (collector == null) {
        collector = val;
      } else {
        collector = OperationOutput.merge(collector, val);
      }
      LOG.info("Combined " + val + " into/with " + collector);
      ++reduceAm;
    } catch (Exception e) {
      ++errorAm;
      logAndSetStatus(reporter, "Error iterating over reduction input "
          + value + " due to : " + StringUtils.stringifyException(e));
      if (getConfig().shouldExitOnFirstError()) {
        break;
      }
    }
  }
  logAndSetStatus(reporter, "Reduced " + reduceAm + " values with " + errorAm
      + " errors");
  if (collector != null) {
    logAndSetStatus(reporter, "Writing output " + collector.getKey() + " : "
        + collector.getOutputValue());
    output.collect(collector.getKey(), collector.getOutputValue());
  }
}

Example #29

Source File: TokenCountMapper.java From hadoop with Apache License 2.0

5 votes

public void map(K key, Text value,
                OutputCollector<Text, LongWritable> output,
                Reporter reporter)
  throws IOException {
  // get input text
  String text = value.toString();       // value is line of text

  // tokenize the value
  StringTokenizer st = new StringTokenizer(text);
  while (st.hasMoreTokens()) {
    // output <token,1> pairs
    output.collect(new Text(st.nextToken()), new LongWritable(1));
  }  
}

Example #30

Source File: FailMapper.java From hadoop with Apache License 2.0

5 votes

public void map(WritableComparable key, Writable value,
    OutputCollector<WritableComparable, Writable> out, Reporter reporter)
    throws IOException {
  // NOTE- the next line is required for the TestDebugScript test to succeed
  System.err.println("failing map");
  throw new RuntimeException("failing map");
}