org.apache.hadoop.mapred.Reporter Java Examples

The following examples show how to use org.apache.hadoop.mapred.Reporter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InputSampler.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      samples.add(key);
      key = reader.createKey();
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}
 
Example #2
Source File: CleaningJob.java    From nutch-htmlunit with Apache License 2.0 6 votes vote down vote up
@Override
public void reduce(ByteWritable key, Iterator<Text> values,
        OutputCollector<Text, ByteWritable> output, Reporter reporter)
        throws IOException {
    while (values.hasNext()) {
        Text document = values.next();
        writers.delete(document.toString());
        totalDeleted++;
        reporter.incrCounter("CleaningJobStatus", "Deleted documents",
                1);
        // if (numDeletes >= NUM_MAX_DELETE_REQUEST) {
        // LOG.info("CleaningJob: deleting " + numDeletes
        // + " documents");
        // // TODO updateRequest.process(solr);
        // // TODO updateRequest = new UpdateRequest();
        // writers.delete(key.toString());
        // totalDeleted += numDeletes;
        // numDeletes = 0;
        // }
    }
}
 
Example #3
Source File: ValueAggregatorCombiner.java    From big-c with Apache License 2.0 6 votes vote down vote up
/** Combines values for a given key.  
 * @param key the key is expected to be a Text object, whose prefix indicates
 * the type of aggregation to aggregate the values. 
 * @param values the values to combine
 * @param output to collect combined values
 */
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
  String keyStr = key.toString();
  int pos = keyStr.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR);
  String type = keyStr.substring(0, pos);
  ValueAggregator aggregator = ValueAggregatorBaseDescriptor
    .generateValueAggregator(type);
  while (values.hasNext()) {
    aggregator.addNextValue(values.next());
  }
  Iterator outputs = aggregator.getCombinerOutput().iterator();

  while (outputs.hasNext()) {
    Object v = outputs.next();
    if (v instanceof Text) {
      output.collect(key, (Text)v);
    } else {
      output.collect(key, new Text(v.toString()));
    }
  }
}
 
Example #4
Source File: HiveKuduTableOutputFormat.java    From HiveKudu-Handler with Apache License 2.0 6 votes vote down vote up
@Override
public void close(Reporter reporter) throws IOException {
    try {
        LOG.warn("I was called : close");
        processRowErrors(session.close());
        shutdownClient();
    } catch (Exception e) {
        throw new IOException("Encountered an error while closing this task", e);
    } finally {
        if (reporter != null) {
            // This is the only place where we have access to the context in the record writer,
            // so set the counter here.
            reporter.getCounter(Counters.ROWS_WITH_ERRORS).setValue(rowsWithErrors.get());
        }
    }
}
 
Example #5
Source File: WikiMetadata.java    From wikireverse with MIT License 6 votes vote down vote up
public Hashtable<String, LinkWritable> createResults(Page page, Reporter reporter)
		throws IOException, JsonParseException, URISyntaxException {
	Hashtable<String, LinkWritable> results = new Hashtable<String, LinkWritable>();
	HashSet<String> linkUrls = page.getLinkUrls();
	
	if (linkUrls != null && linkUrls.isEmpty() == false) {
		List<WikiArticle> articles = filterArticles(linkUrls, reporter);

		for (WikiArticle article : articles) {
		results.put(article.getKey(), new LinkWritable(article.getArticleName(),
														formatField(page.getTitle(), TITLE_LENGTH),
														page.getWarcDate(),
														page.getUrl()));
		}
	}
	
	return results;
}
 
Example #6
Source File: PipesReducer.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void startApplication(OutputCollector<K3, V3> output, Reporter reporter) throws IOException {
  if (application == null) {
    try {
      LOG.info("starting application");
      application = 
        new Application<K2, V2, K3, V3>(
            job, null, output, reporter, 
            (Class<? extends K3>) job.getOutputKeyClass(), 
            (Class<? extends V3>) job.getOutputValueClass());
      downlink = application.getDownlink();
    } catch (InterruptedException ie) {
      throw new RuntimeException("interrupted", ie);
    }
    int reduce=0;
    downlink.runReduce(reduce, Submitter.getIsJavaRecordWriter(job));
  }
}
 
Example #7
Source File: TestDFSIO.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override // IOMapperBase
public Long doIO(Reporter reporter, 
                   String name, 
                   long totalSize // in bytes
                 ) throws IOException {
  OutputStream out = (OutputStream)this.stream;
  // write to the file
  long nrRemaining;
  for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= bufferSize) {
    int curSize = (bufferSize < nrRemaining) ? bufferSize : (int)nrRemaining;
    out.write(buffer, 0, curSize);
    reporter.setStatus("writing " + name + "@" + 
                       (totalSize - nrRemaining) + "/" + totalSize 
                       + " ::host = " + hostName);
  }
  return Long.valueOf(totalSize);
}
 
Example #8
Source File: DataJoinMapperBase.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void map(Object key, Object value,
                OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }
  addLongValue("totalCount", 1);
  TaggedMapOutput aRecord = generateTaggedMapOutput(value);
  if (aRecord == null) {
    addLongValue("discardedCount", 1);
    return;
  }
  Text groupKey = generateGroupKey(aRecord);
  if (groupKey == null) {
    addLongValue("nullGroupKeyCount", 1);
    return;
  }
  output.collect(groupKey, aRecord);
  addLongValue("collectedCount", 1);
}
 
Example #9
Source File: HadoopV1OutputCollector.java    From ignite with Apache License 2.0 6 votes vote down vote up
/**
 * @param jobConf Job configuration.
 * @param taskCtx Task context.
 * @param directWrite Direct write flag.
 * @param fileName File name.
 * @throws IOException In case of IO exception.
 */
HadoopV1OutputCollector(JobConf jobConf, HadoopTaskContext taskCtx, boolean directWrite,
    @Nullable String fileName, TaskAttemptID attempt) throws IOException {
    this.jobConf = jobConf;
    this.taskCtx = taskCtx;
    this.attempt = attempt;

    if (directWrite) {
        jobConf.set("mapreduce.task.attempt.id", attempt.toString());

        OutputFormat outFormat = jobConf.getOutputFormat();

        writer = outFormat.getRecordWriter(null, jobConf, fileName, Reporter.NULL);
    }
    else
        writer = null;
}
 
Example #10
Source File: LRIdentificationModelHadoop.java    From semafor-semantic-parser with GNU General Public License v3.0 6 votes vote down vote up
public String getBestFrame(String frameLine, String parseLine, Reporter reporter)
{
	String result = null;
	Set<String> set = mFrameMap.keySet();
	double maxVal = -Double.MIN_VALUE;
	for(String frame: set)
	{
		String[] toks = frameLine.split("\t");
		String newFrameLine = frame+"\t"+toks[1]+"\t"+toks[2];
		LogFormula formula = getNumeratorFormula(newFrameLine, parseLine, reporter);
		double val = formula.evaluate(this).exponentiate();
		if(val>maxVal)
		{
			maxVal = val;
			result=""+frame;
		}
		if(reporter!=null)
			reporter.setStatus("Considered "+frame+" for frameLine:"+frameLine);
		System.out.println("Considered "+frame+" for frameLine:"+frameLine);
	}
	return result;
}
 
Example #11
Source File: TestDBInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * test DBInputFormat class. Class should split result for chunks
 * @throws Exception
 */
@Test(timeout = 10000)
public void testDBInputFormat() throws Exception {
  JobConf configuration = new JobConf();
  setupDriver(configuration);
  
  DBInputFormat<NullDBWritable> format = new DBInputFormat<NullDBWritable>();
  format.setConf(configuration);
  format.setConf(configuration);
  DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10);
  Reporter reporter = mock(Reporter.class);
  RecordReader<LongWritable, NullDBWritable> reader = format.getRecordReader(
      splitter, configuration, reporter);

  configuration.setInt(MRJobConfig.NUM_MAPS, 3);
  InputSplit[] lSplits = format.getSplits(configuration, 3);
  assertEquals(5, lSplits[0].getLength());
  assertEquals(3, lSplits.length);

  // test reader .Some simple tests
  assertEquals(LongWritable.class, reader.createKey().getClass());
  assertEquals(0, reader.getPos());
  assertEquals(0, reader.getProgress(), 0.001);
  reader.close();
}
 
Example #12
Source File: ReaderTextLIBSVMParallel.java    From systemds with Apache License 2.0 6 votes vote down vote up
@Override
public Object call() 
	throws Exception 
{
	RecordReader<LongWritable, Text> reader = _informat.getRecordReader(_split, _job, Reporter.NULL);
	LongWritable key = new LongWritable();
	Text oneLine = new Text();

	try {
		// count rows from the first row
		while (reader.next(key, oneLine)) {
			_nrows++;
		}
	} 
	catch (Exception e) {
		_rc = false;
		_errMsg = "RecordReader error libsvm format. split: "+ _split.toString() + e.getMessage();
		throw new IOException(_errMsg);
	} 
	finally {
		IOUtilFunctions.closeSilently(reader);
	}

	return null;
}
 
Example #13
Source File: RandomWriter.java    From hadoop-book with Apache License 2.0 6 votes vote down vote up
/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(WritableComparable key,
        Writable value,
        OutputCollector<BytesWritable, BytesWritable> output,
        Reporter reporter) throws IOException {
    int itemCount = 0;
    while (numBytesToWrite > 0) {
        int keyLength = minKeySize
                + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
        randomKey.setSize(keyLength);
        randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
        int valueLength = minValueSize
                + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
        randomValue.setSize(valueLength);
        randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
        output.collect(randomKey, randomValue);
        numBytesToWrite -= keyLength + valueLength;
        reporter.incrCounter(Counters.BYTES_WRITTEN, keyLength + valueLength);
        reporter.incrCounter(Counters.RECORDS_WRITTEN, 1);
        if (++itemCount % 200 == 0) {
            reporter.setStatus("wrote record " + itemCount + ". "
                    + numBytesToWrite + " bytes left.");
        }
    }
    reporter.setStatus("done with " + itemCount + " records.");
}
 
Example #14
Source File: NodeDumper.java    From anthelion with Apache License 2.0 6 votes vote down vote up
/**
 * Outputs the url with the appropriate number of inlinks, outlinks, or for
 * score.
 */
public void map(Text key, Node node,
  OutputCollector<FloatWritable, Text> output, Reporter reporter)
  throws IOException {

  float number = 0;
  if (inlinks) {
    number = node.getNumInlinks();
  }
  else if (outlinks) {
    number = node.getNumOutlinks();
  }
  else {
    number = node.getInlinkScore();
  }

  // number collected with negative to be descending
  output.collect(new FloatWritable(-number), key);
}
 
Example #15
Source File: StreamXmlRecordReader.java    From big-c with Apache License 2.0 6 votes vote down vote up
public StreamXmlRecordReader(FSDataInputStream in, FileSplit split, Reporter reporter,
                             JobConf job, FileSystem fs) throws IOException {
  super(in, split, reporter, job, fs);

  beginMark_ = checkJobGet(CONF_NS + "begin");
  endMark_ = checkJobGet(CONF_NS + "end");

  maxRecSize_ = job_.getInt(CONF_NS + "maxrec", 50 * 1000);
  lookAhead_ = job_.getInt(CONF_NS + "lookahead", 2 * maxRecSize_);
  synched_ = false;

  slowMatch_ = job_.getBoolean(CONF_NS + "slowmatch", false);
  if (slowMatch_) {
    beginPat_ = makePatternCDataOrMark(beginMark_);
    endPat_ = makePatternCDataOrMark(endMark_);
  }
  init();
}
 
Example #16
Source File: OptimizedDataJoinMapperBase.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
public void map(Object key, Object value,
                OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }
  addLongValue("totalCount", 1);
  OutputValue aRecord = genMapOutputValue(value);
  if (aRecord == null) {
    addLongValue("discardedCount", 1);
    return;
  }
  aRecord.setSmaller(smaller);
  String groupKey = genGroupKey(key, aRecord);
  if (groupKey == null) {
    addLongValue("nullGroupKeyCount", 1);
    return;
  }
  outputKey.setKey(groupKey);
  output.collect(outputKey, aRecord);
  addLongValue("collectedCount", 1);
}
 
Example #17
Source File: LinkDbMerger.java    From anthelion with Apache License 2.0 6 votes vote down vote up
public void reduce(Text key, Iterator<Inlinks> values, OutputCollector<Text, Inlinks> output, Reporter reporter) throws IOException {

    Inlinks result = new Inlinks();

    while (values.hasNext()) {
      Inlinks inlinks = values.next();

      int end = Math.min(maxInlinks - result.size(), inlinks.size());
      Iterator<Inlink> it = inlinks.iterator();
      int i = 0;
      while(it.hasNext() && i++ < end) {
        result.add(it.next());
      }
    }
    if (result.size() == 0) return;
    output.collect(key, result);
    
  }
 
Example #18
Source File: HadoopOutputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCloseWithoutTaskCommit() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	when(outputCommitter.needsTaskCommit(any(TaskAttemptContext.class))).thenReturn(false);
	DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
	JobConf jobConf = mock(JobConf.class);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
	outputFormat.recordWriter = recordWriter;
	outputFormat.outputCommitter = outputCommitter;

	outputFormat.close();

	verify(recordWriter, times(1)).close(any(Reporter.class));
	verify(outputCommitter, times(0)).commitTask(any(TaskAttemptContext.class));
}
 
Example #19
Source File: TokenCountMapper.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void map(K key, Text value,
                OutputCollector<Text, LongWritable> output,
                Reporter reporter)
  throws IOException {
  // get input text
  String text = value.toString();       // value is line of text

  // tokenize the value
  StringTokenizer st = new StringTokenizer(text);
  while (st.hasMoreTokens()) {
    // output <token,1> pairs
    output.collect(new Text(st.nextToken()), new LongWritable(1));
  }  
}
 
Example #20
Source File: SegmentReader.java    From nutch-htmlunit with Apache License 2.0 5 votes vote down vote up
public void map(WritableComparable<?> key, Writable value,
    OutputCollector<Text, NutchWritable> collector, Reporter reporter) throws IOException {
  // convert on the fly from old formats with UTF8 keys.
  // UTF8 deprecated and replaced by Text.
  if (key instanceof Text) {
    newKey.set(key.toString());
    key = newKey;
  }
  collector.collect((Text)key, new NutchWritable(value));
}
 
Example #21
Source File: RandomTextWriter.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(Text key, Text value,
                OutputCollector<Text, Text> output, 
                Reporter reporter) throws IOException {
  int itemCount = 0;
  while (numBytesToWrite > 0) {
    // Generate the key/value 
    int noWordsKey = minWordsInKey + 
      (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0);
    int noWordsValue = minWordsInValue + 
      (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0);
    Text keyWords = generateSentence(noWordsKey);
    Text valueWords = generateSentence(noWordsValue);
    
    // Write the sentence 
    output.collect(keyWords, valueWords);
    
    numBytesToWrite -= (keyWords.getLength() + valueWords.getLength());
    
    // Update counters, progress etc.
    reporter.incrCounter(Counters.BYTES_WRITTEN, 
                         (keyWords.getLength()+valueWords.getLength()));
    reporter.incrCounter(Counters.RECORDS_WRITTEN, 1);
    if (++itemCount % 200 == 0) {
      reporter.setStatus("wrote record " + itemCount + ". " + 
                         numBytesToWrite + " bytes left.");
    }
  }
  reporter.setStatus("done with " + itemCount + " records.");
}
 
Example #22
Source File: BusyAirports.java    From gemfirexd-oss with Apache License 2.0 5 votes vote down vote up
@Override
public void reduce(Text token, Iterator<IntWritable> values,
    OutputCollector<Text, IntWritable> output,
    Reporter reporter) throws IOException {
  int sum = 0;

  while (values.hasNext()) {
    sum += values.next().get();
  }

  output.collect(token, new IntWritable(sum));
}
 
Example #23
Source File: TestTableSnapshotInputFormat.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
public void map(ImmutableBytesWritable key, Result value,
    OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter)
    throws IOException {
  verifyRowFromMap(key, value);
  collector.collect(key, NullWritable.get());
}
 
Example #24
Source File: FilterAlignments.java    From emr-sample-apps with Apache License 2.0 5 votes vote down vote up
public synchronized void reduce(IntWritable readid, Iterator<BytesWritable> values,
  			             OutputCollector<IntWritable, BytesWritable> output, Reporter reporter) throws IOException 
{
	boolean recordBest = true;
	bestAlignment.fromBytes(values.next());
				
	while (values.hasNext()) 
	{
		curAlignment.fromBytes(values.next());
		
		if (curAlignment.m_differences < bestAlignment.m_differences)
		{
			bestAlignment.set(curAlignment);
			recordBest = true;
		}
		else if (curAlignment.m_differences == bestAlignment.m_differences)
		{
			recordBest = false;
		}
		else
		{
			// curAlignment is worse than best alignment, nothing to do
			
		}
	}
	
	if (recordBest)
	{
		output.collect(readid, bestAlignment.toBytes());
	}
}
 
Example #25
Source File: L11.java    From spork with Apache License 2.0 5 votes vote down vote up
public void reduce(
        Text key,
        Iterator<Text> iter,
        OutputCollector<Text, Text> oc,
        Reporter reporter) throws IOException {

    // Just take the key and the first value.
    oc.collect(key, iter.next());
}
 
Example #26
Source File: PipeMapRed.java    From big-c with Apache License 2.0 5 votes vote down vote up
void startOutputThreads(OutputCollector output, Reporter reporter) 
  throws IOException {
  inWriter_ = createInputWriter();
  outReader_ = createOutputReader();
  outThread_ = new MROutputThread(outReader_, output, reporter);
  outThread_.start();
  errThread_ = new MRErrorThread();
  errThread_.setReporter(reporter);
  errThread_.start();
}
 
Example #27
Source File: MneInputFormat.java    From mnemonic with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<NullWritable, MV>
getRecordReader(InputSplit inputSpilt,
                JobConf jobConf,
                Reporter reporter) throws IOException {
  MneMapredRecordReader<MV, V> reader =
          new MneMapredRecordReader<MV, V>((FileSplit) inputSpilt, jobConf);
  return reader;
}
 
Example #28
Source File: TestStreamingOutputKeyValueTypes.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void reduce(K key, Iterator<V> values,
    OutputCollector<LongWritable, Text> output, Reporter reporter)
    throws IOException {
  LongWritable l = new LongWritable();
  while (values.hasNext()) {
    output.collect(l, new Text(values.next().toString()));
  }
}
 
Example #29
Source File: ExportFileFlusherTest.java    From emr-dynamodb-connector with Apache License 2.0 5 votes vote down vote up
@Test
public void close_whenRTE_thenConsecutiveSyncCallFails() throws IOException,
    InterruptedException {
  doThrow(new RuntimeException()).when(recordWriter).close(Reporter.NULL);
  flusher.close(recordWriter, Reporter.NULL);
  expectedException.expect(RuntimeException.class);
  flusher.sync();
}
 
Example #30
Source File: TestFetcher.java    From big-c with Apache License 2.0 5 votes vote down vote up
public FakeFetcher(JobConf job, TaskAttemptID reduceId,
    ShuffleSchedulerImpl<K,V> scheduler, MergeManagerImpl<K,V> merger,
    Reporter reporter, ShuffleClientMetrics metrics,
    ExceptionReporter exceptionReporter, SecretKey jobTokenSecret,
    HttpURLConnection connection, int id) {
  super(job, reduceId, scheduler, merger, reporter, metrics,
      exceptionReporter, jobTokenSecret, id);
  this.connection = connection;
}