Java Code Examples for org.apache.hadoop.mapred.OutputCollector#collect()

The following examples show how to use org.apache.hadoop.mapred.OutputCollector#collect() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: DataJoinMapperBase.java From hadoop with Apache License 2.0

6 votes

public void map(Object key, Object value,
                OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }
  addLongValue("totalCount", 1);
  TaggedMapOutput aRecord = generateTaggedMapOutput(value);
  if (aRecord == null) {
    addLongValue("discardedCount", 1);
    return;
  }
  Text groupKey = generateGroupKey(aRecord);
  if (groupKey == null) {
    addLongValue("nullGroupKeyCount", 1);
    return;
  }
  output.collect(groupKey, aRecord);
  addLongValue("collectedCount", 1);
}

Example 2

Source File: HadoopReduceCombineFunctionITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void reduce(IntWritable k, Iterator<Text> vs, OutputCollector<IntWritable, IntWritable> out, Reporter r)
		throws IOException {
	int commentCnt = 0;
	while (vs.hasNext()) {
		String v = vs.next().toString();
		if (v.startsWith(this.countPrefix)) {
			commentCnt++;
		}
	}
	out.collect(k, new IntWritable(commentCnt));
}

Example 3

Source File: WordCount.java From hadoop with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<IntWritable> values,
                   OutputCollector<Text, IntWritable> output, 
                   Reporter reporter) throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  output.collect(key, new IntWritable(sum));
}

Example 4

Source File: ValueCountReduce.java From hadoop with Apache License 2.0

5 votes

public void reduce(Object arg0, Iterator arg1, OutputCollector arg2, Reporter arg3) throws IOException {
  int count = 0;
  while (arg1.hasNext()) {
    count += 1;
    arg1.next();
  }
  arg2.collect(arg0, new Text("" + count));
}

Example 5

Source File: TestDatamerge.java From hadoop with Apache License 2.0

5 votes

public void map(IntWritable key, TupleWritable val,
    OutputCollector<IntWritable, IntWritable> out, Reporter reporter)
    throws IOException {
  int k = key.get();
  final String kvstr = "Unexpected tuple: " + stringify(key, val);
  assertTrue(kvstr, 0 == k % (srcs * srcs));
  for (int i = 0; i < val.size(); ++i) {
    final int vali = ((IntWritable)val.get(i)).get();
    assertTrue(kvstr, (vali - i) * srcs == 10 * k);
  }
  out.collect(key, one);
}

Example 6

Source File: HadoopReduceFunctionITCase.java From flink with Apache License 2.0

5 votes

@Override
public void reduce(IntWritable k, Iterator<Text> vs, OutputCollector<IntWritable, IntWritable> out, Reporter r)
		throws IOException {
	int commentCnt = 0;
	while (vs.hasNext()) {
		String v = vs.next().toString();
		if (v.startsWith("Comment")) {
			commentCnt++;
		}
	}
	out.collect(new IntWritable(42), new IntWritable(commentCnt));
}

Example 7

Source File: WordCount.java From hadoop with Apache License 2.0

5 votes

public void map(LongWritable key, Text value, 
                OutputCollector<Text, IntWritable> output, 
                Reporter reporter) throws IOException {
  String line = value.toString();
  StringTokenizer itr = new StringTokenizer(line);
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    output.collect(word, one);
  }
}

Example 8

Source File: PersonVersion.java From blog with MIT License

5 votes

@Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

    KPI kpi = KPI.filterPVs(value.toString());
    if (kpi.getValid() == 0 && kpi.getRemote_addr() != null) {
        word.set(kpi.getRemote_addr());
        output.collect(word, one);
    }
}

Example 9

Source File: PersonVersion.java From blog with MIT License

5 votes

@Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

    KPI kpi = KPI.filterPVs(value.toString());
    if (kpi.getValid() == 0 && kpi.getRemote_addr() != null) {
        word.set(kpi.getRemote_addr());
        output.collect(word, one);
    }
}

Example 10

Source File: HadoopMapredCompatWordCount.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void map(LongWritable k, Text v, OutputCollector<Text, LongWritable> out, Reporter rep)
		throws IOException {
	// normalize and split the line
	String line = v.toString();
	String[] tokens = line.toLowerCase().split("\\W+");

	// emit the pairs
	for (String token : tokens) {
		if (token.length() > 0) {
			out.collect(new Text(token), new LongWritable(1L));
		}
	}
}

Example 11

Source File: LoadGeneratorMR.java From hadoop with Apache License 2.0

5 votes

@Override
public void reduce(Text key, Iterator<IntWritable> values,
    OutputCollector<Text, IntWritable> output, Reporter reporter)
    throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  if (key.equals(OPEN_EXECTIME)){
    executionTime[OPEN] = sum;
  } else if (key.equals(NUMOPS_OPEN)){
    numOfOps[OPEN] = sum;
  } else if (key.equals(LIST_EXECTIME)){
    executionTime[LIST] = sum;
  } else if (key.equals(NUMOPS_LIST)){
    numOfOps[LIST] = sum;
  } else if (key.equals(DELETE_EXECTIME)){
    executionTime[DELETE] = sum;
  } else if (key.equals(NUMOPS_DELETE)){
    numOfOps[DELETE] = sum;
  } else if (key.equals(CREATE_EXECTIME)){
    executionTime[CREATE] = sum;
  } else if (key.equals(NUMOPS_CREATE)){
    numOfOps[CREATE] = sum;
  } else if (key.equals(WRITE_CLOSE_EXECTIME)){
    System.out.println(WRITE_CLOSE_EXECTIME + " = " + sum);
    executionTime[WRITE_CLOSE]= sum;
  } else if (key.equals(NUMOPS_WRITE_CLOSE)){
    numOfOps[WRITE_CLOSE] = sum;
  } else if (key.equals(TOTALOPS)){
    totalOps = sum;
  } else if (key.equals(ELAPSED_TIME)){
    totalTime = sum;
  }
  result.set(sum);
  output.collect(key, result);
  // System.out.println("Key = " + key + " Sum is =" + sum);
  // printResults(System.out);
}

Example 12

Source File: HadoopReduceCombineFunctionITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void reduce(IntWritable k, Iterator<IntWritable> v, OutputCollector<IntWritable, IntWritable> out, Reporter r)
		throws IOException {

	int sum = 0;
	while (v.hasNext()) {
		sum += v.next().get();
	}
	out.collect(k, new IntWritable(sum));
}

Example 13

Source File: FieldSelectionMapReduce.java From hadoop with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter)
  throws IOException {
  String keyStr = key.toString() + this.fieldSeparator;
  while (values.hasNext()) {
      FieldSelectionHelper helper = new FieldSelectionHelper();
      helper.extractOutputKeyValue(keyStr, values.next().toString(),
        fieldSeparator, reduceOutputKeyFieldList,
        reduceOutputValueFieldList, allReduceValueFieldsFrom, false, false);
    output.collect(helper.getKey(), helper.getValue());
  }
}

Example 14

Source File: HadoopArchives.java From hadoop with Apache License 2.0

5 votes

public void map(LongWritable key, HarEntry value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  Path relPath = new Path(value.path);
  int hash = HarFileSystem.getHarHash(relPath);
  String towrite = null;
  Path srcPath = realPath(relPath, rootPath);
  long startPos = partStream.getPos();
  FileSystem srcFs = srcPath.getFileSystem(conf);
  FileStatus srcStatus = srcFs.getFileStatus(srcPath);
  String propStr = encodeProperties(srcStatus);
  if (value.isDir()) { 
    towrite = encodeName(relPath.toString())
              + " dir " + propStr + " 0 0 ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: value.children) {
      sbuff.append(encodeName(child) + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = encodeName(relPath.toString())
              + " file " + partname + " " + startPos
              + " " + srcStatus.getLen() + " " + propStr + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}

Example 15

Source File: ExternalMapperReducer.java From hadoop with Apache License 2.0

5 votes

public void reduce(WritableComparable key, Iterator<Writable> values,
                   OutputCollector<WritableComparable, IntWritable> output,
                   Reporter reporter)
  throws IOException {
  
  int count = 0;
  while (values.hasNext()) {
    count++;
    values.next();
  }
  output.collect(key, new IntWritable(count));
}

Example 16

Source File: SliveMapper.java From hadoop with Apache License 2.0

5 votes

/**
 * Runs the given operation and reports on its results
 * 
 * @param op
 *          the operation to run
 * @param reporter
 *          the status reporter to notify
 * @param output
 *          the output to write to
 * @throws IOException
 */
private void runOperation(Operation op, Reporter reporter,
    OutputCollector<Text, Text> output, long opNum) throws IOException {
  if (op == null) {
    return;
  }
  logAndSetStatus(reporter, "Running operation #" + opNum + " (" + op + ")");
  List<OperationOutput> opOut = op.run(filesystem);
  logAndSetStatus(reporter, "Finished operation #" + opNum + " (" + op + ")");
  if (opOut != null && !opOut.isEmpty()) {
    for (OperationOutput outData : opOut) {
      output.collect(outData.getKey(), outData.getOutputValue());
    }
  }
}

Example 17

Source File: HadoopMapredCompatWordCount.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void reduce(Text k, Iterator<LongWritable> vs, OutputCollector<Text, LongWritable> out, Reporter rep)
		throws IOException {

	long cnt = 0;
	while (vs.hasNext()) {
		cnt += vs.next().get();
	}
	out.collect(k, new LongWritable(cnt));

}

Example 18

Source File: IdentityMapper.java From hadoop with Apache License 2.0

4 votes

/** The identify function.  Input key/value pair is written directly to
 * output.*/
public void map(K key, V val,
                OutputCollector<K, V> output, Reporter reporter)
  throws IOException {
  output.collect(key, val);
}

Example 19

Source File: RemoteParForUtils.java From systemds with Apache License 2.0

4 votes

/**
 * For remote MR parfor workers.
 * 
 * @param workerID worker id
 * @param vars local variable map
 * @param resultVars list of result variables
 * @param rvarFnames ?
 * @param out output collectors
 * @throws IOException if IOException occurs
 */
public static void exportResultVariables( long workerID, LocalVariableMap vars, ArrayList<ResultVar> resultVars, 
		HashMap<String,String> rvarFnames, OutputCollector<Writable, Writable> out ) throws IOException
{
	//create key and value for reuse
	LongWritable okey = new LongWritable( workerID ); 
	Text ovalue = new Text();
	
	//foreach result variables probe if export necessary
	for( ResultVar rvar : resultVars )
	{
		Data dat = vars.get( rvar._name );
		
		//export output variable to HDFS (see RunMRJobs)
		if ( dat != null && dat.getDataType() == DataType.MATRIX ) 
		{
			MatrixObject mo = (MatrixObject) dat;
			if( mo.isDirty() )
			{
				if( rvarFnames!=null ) {
					String fname = rvarFnames.get( rvar._name );
					if( fname!=null )
						mo.setFileName( fname );
						
					//export result var (iff actually modified in parfor)
					mo.exportData(); //note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)		
					rvarFnames.put(rvar._name, mo.getFileName());
				}
				else {
					//export result var (iff actually modified in parfor)
					mo.exportData(); //note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)
				}
				
				//pass output vars (scalars by value, matrix by ref) to result
				//(only if actually exported, hence in check for dirty, otherwise potential problems in result merge)
				String datStr = ProgramConverter.serializeDataObject(rvar._name, mo);
				ovalue.set( datStr );
				out.collect( okey, ovalue );
			}
		}
	}
}

Example 20

Source File: LoadGeneratorMR.java From hadoop with Apache License 2.0

4 votes

public void map(LongWritable key, Text value,
    OutputCollector<Text, IntWritable> output, Reporter reporter)
    throws IOException {
  ProgressThread progressThread = new ProgressThread(reporter);
  progressThread.start();
  try {
    new LoadGenerator(jobConf).generateLoadOnNN();
    System.out
        .println("Finished generating load on NN, sending results to the reducer");
    printResults(System.out);
    progressThread.keepGoing = false;
    progressThread.join();

    // Send results to Reducer
    output.collect(OPEN_EXECTIME,
        new IntWritable((int) executionTime[OPEN]));
    output.collect(NUMOPS_OPEN, new IntWritable((int) numOfOps[OPEN]));

    output.collect(LIST_EXECTIME,
        new IntWritable((int) executionTime[LIST]));
    output.collect(NUMOPS_LIST, new IntWritable((int) numOfOps[LIST]));

    output.collect(DELETE_EXECTIME, new IntWritable(
        (int) executionTime[DELETE]));
    output.collect(NUMOPS_DELETE, new IntWritable((int) numOfOps[DELETE]));

    output.collect(CREATE_EXECTIME, new IntWritable(
        (int) executionTime[CREATE]));
    output.collect(NUMOPS_CREATE, new IntWritable((int) numOfOps[CREATE]));

    output.collect(WRITE_CLOSE_EXECTIME, new IntWritable(
        (int) executionTime[WRITE_CLOSE]));
    output.collect(NUMOPS_WRITE_CLOSE, new IntWritable(
        (int) numOfOps[WRITE_CLOSE]));

    output.collect(TOTALOPS, new IntWritable((int) totalOps));
    output.collect(ELAPSED_TIME, new IntWritable((int) totalTime));

  } catch (InterruptedException e) {
    e.printStackTrace();
  }
}