org.apache.hadoop.mapred.Reporter Java Examples

The following examples show how to use org.apache.hadoop.mapred.Reporter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: InputSampler.java From hadoop with Apache License 2.0

6 votes

/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      samples.add(key);
      key = reader.createKey();
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}

Example #2

Source File: CleaningJob.java From nutch-htmlunit with Apache License 2.0

6 votes

@Override
public void reduce(ByteWritable key, Iterator<Text> values,
        OutputCollector<Text, ByteWritable> output, Reporter reporter)
        throws IOException {
    while (values.hasNext()) {
        Text document = values.next();
        writers.delete(document.toString());
        totalDeleted++;
        reporter.incrCounter("CleaningJobStatus", "Deleted documents",
                1);
        // if (numDeletes >= NUM_MAX_DELETE_REQUEST) {
        // LOG.info("CleaningJob: deleting " + numDeletes
        // + " documents");
        // // TODO updateRequest.process(solr);
        // // TODO updateRequest = new UpdateRequest();
        // writers.delete(key.toString());
        // totalDeleted += numDeletes;
        // numDeletes = 0;
        // }
    }
}

Example #3

Source File: ValueAggregatorCombiner.java From big-c with Apache License 2.0

6 votes

/** Combines values for a given key.  
 * @param key the key is expected to be a Text object, whose prefix indicates
 * the type of aggregation to aggregate the values. 
 * @param values the values to combine
 * @param output to collect combined values
 */
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
  String keyStr = key.toString();
  int pos = keyStr.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR);
  String type = keyStr.substring(0, pos);
  ValueAggregator aggregator = ValueAggregatorBaseDescriptor
    .generateValueAggregator(type);
  while (values.hasNext()) {
    aggregator.addNextValue(values.next());
  }
  Iterator outputs = aggregator.getCombinerOutput().iterator();

  while (outputs.hasNext()) {
    Object v = outputs.next();
    if (v instanceof Text) {
      output.collect(key, (Text)v);
    } else {
      output.collect(key, new Text(v.toString()));
    }
  }
}

Example #4

Source File: HiveKuduTableOutputFormat.java From HiveKudu-Handler with Apache License 2.0

6 votes

@Override
public void close(Reporter reporter) throws IOException {
    try {
        LOG.warn("I was called : close");
        processRowErrors(session.close());
        shutdownClient();
    } catch (Exception e) {
        throw new IOException("Encountered an error while closing this task", e);
    } finally {
        if (reporter != null) {
            // This is the only place where we have access to the context in the record writer,
            // so set the counter here.
            reporter.getCounter(Counters.ROWS_WITH_ERRORS).setValue(rowsWithErrors.get());
        }
    }
}

Example #5

Source File: WikiMetadata.java From wikireverse with MIT License

6 votes

public Hashtable<String, LinkWritable> createResults(Page page, Reporter reporter)
		throws IOException, JsonParseException, URISyntaxException {
	Hashtable<String, LinkWritable> results = new Hashtable<String, LinkWritable>();
	HashSet<String> linkUrls = page.getLinkUrls();
	
	if (linkUrls != null && linkUrls.isEmpty() == false) {
		List<WikiArticle> articles = filterArticles(linkUrls, reporter);

		for (WikiArticle article : articles) {
		results.put(article.getKey(), new LinkWritable(article.getArticleName(),
														formatField(page.getTitle(), TITLE_LENGTH),
														page.getWarcDate(),
														page.getUrl()));
		}
	}
	
	return results;
}

Example #6

Source File: PipesReducer.java From hadoop-gpu with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
private void startApplication(OutputCollector<K3, V3> output, Reporter reporter) throws IOException {
  if (application == null) {
    try {
      LOG.info("starting application");
      application = 
        new Application<K2, V2, K3, V3>(
            job, null, output, reporter, 
            (Class<? extends K3>) job.getOutputKeyClass(), 
            (Class<? extends V3>) job.getOutputValueClass());
      downlink = application.getDownlink();
    } catch (InterruptedException ie) {
      throw new RuntimeException("interrupted", ie);
    }
    int reduce=0;
    downlink.runReduce(reduce, Submitter.getIsJavaRecordWriter(job));
  }
}

Example #7

Source File: TestDFSIO.java From big-c with Apache License 2.0

6 votes

@Override // IOMapperBase
public Long doIO(Reporter reporter, 
                   String name, 
                   long totalSize // in bytes
                 ) throws IOException {
  OutputStream out = (OutputStream)this.stream;
  // write to the file
  long nrRemaining;
  for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= bufferSize) {
    int curSize = (bufferSize < nrRemaining) ? bufferSize : (int)nrRemaining;
    out.write(buffer, 0, curSize);
    reporter.setStatus("writing " + name + "@" + 
                       (totalSize - nrRemaining) + "/" + totalSize 
                       + " ::host = " + hostName);
  }
  return Long.valueOf(totalSize);
}

Example #8

Source File: DataJoinMapperBase.java From hadoop-gpu with Apache License 2.0

6 votes

public void map(Object key, Object value,
                OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }
  addLongValue("totalCount", 1);
  TaggedMapOutput aRecord = generateTaggedMapOutput(value);
  if (aRecord == null) {
    addLongValue("discardedCount", 1);
    return;
  }
  Text groupKey = generateGroupKey(aRecord);
  if (groupKey == null) {
    addLongValue("nullGroupKeyCount", 1);
    return;
  }
  output.collect(groupKey, aRecord);
  addLongValue("collectedCount", 1);
}

Example #9

Source File: HadoopV1OutputCollector.java From ignite with Apache License 2.0

6 votes

/**
 * @param jobConf Job configuration.
 * @param taskCtx Task context.
 * @param directWrite Direct write flag.
 * @param fileName File name.
 * @throws IOException In case of IO exception.
 */
HadoopV1OutputCollector(JobConf jobConf, HadoopTaskContext taskCtx, boolean directWrite,
    @Nullable String fileName, TaskAttemptID attempt) throws IOException {
    this.jobConf = jobConf;
    this.taskCtx = taskCtx;
    this.attempt = attempt;

    if (directWrite) {
        jobConf.set("mapreduce.task.attempt.id", attempt.toString());

        OutputFormat outFormat = jobConf.getOutputFormat();

        writer = outFormat.getRecordWriter(null, jobConf, fileName, Reporter.NULL);
    }
    else
        writer = null;
}

Example #10

Source File: LRIdentificationModelHadoop.java From semafor-semantic-parser with GNU General Public License v3.0

6 votes

public String getBestFrame(String frameLine, String parseLine, Reporter reporter)
{
	String result = null;
	Set<String> set = mFrameMap.keySet();
	double maxVal = -Double.MIN_VALUE;
	for(String frame: set)
	{
		String[] toks = frameLine.split("\t");
		String newFrameLine = frame+"\t"+toks[1]+"\t"+toks[2];
		LogFormula formula = getNumeratorFormula(newFrameLine, parseLine, reporter);
		double val = formula.evaluate(this).exponentiate();
		if(val>maxVal)
		{
			maxVal = val;
			result=""+frame;
		}
		if(reporter!=null)
			reporter.setStatus("Considered "+frame+" for frameLine:"+frameLine);
		System.out.println("Considered "+frame+" for frameLine:"+frameLine);
	}
	return result;
}

Example #11

Source File: TestDBInputFormat.java From big-c with Apache License 2.0

6 votes

/**
 * test DBInputFormat class. Class should split result for chunks
 * @throws Exception
 */
@Test(timeout = 10000)
public void testDBInputFormat() throws Exception {
  JobConf configuration = new JobConf();
  setupDriver(configuration);
  
  DBInputFormat<NullDBWritable> format = new DBInputFormat<NullDBWritable>();
  format.setConf(configuration);
  format.setConf(configuration);
  DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10);
  Reporter reporter = mock(Reporter.class);
  RecordReader<LongWritable, NullDBWritable> reader = format.getRecordReader(
      splitter, configuration, reporter);

  configuration.setInt(MRJobConfig.NUM_MAPS, 3);
  InputSplit[] lSplits = format.getSplits(configuration, 3);
  assertEquals(5, lSplits[0].getLength());
  assertEquals(3, lSplits.length);

  // test reader .Some simple tests
  assertEquals(LongWritable.class, reader.createKey().getClass());
  assertEquals(0, reader.getPos());
  assertEquals(0, reader.getProgress(), 0.001);
  reader.close();
}

Example #12

Source File: ReaderTextLIBSVMParallel.java From systemds with Apache License 2.0

6 votes

@Override
public Object call() 
	throws Exception 
{
	RecordReader<LongWritable, Text> reader = _informat.getRecordReader(_split, _job, Reporter.NULL);
	LongWritable key = new LongWritable();
	Text oneLine = new Text();

	try {
		// count rows from the first row
		while (reader.next(key, oneLine)) {
			_nrows++;
		}
	} 
	catch (Exception e) {
		_rc = false;
		_errMsg = "RecordReader error libsvm format. split: "+ _split.toString() + e.getMessage();
		throw new IOException(_errMsg);
	} 
	finally {
		IOUtilFunctions.closeSilently(reader);
	}

	return null;
}

Example #13

Source File: RandomWriter.java From hadoop-book with Apache License 2.0

6 votes

/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(WritableComparable key,
        Writable value,
        OutputCollector<BytesWritable, BytesWritable> output,
        Reporter reporter) throws IOException {
    int itemCount = 0;
    while (numBytesToWrite > 0) {
        int keyLength = minKeySize
                + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
        randomKey.setSize(keyLength);
        randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
        int valueLength = minValueSize
                + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
        randomValue.setSize(valueLength);
        randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
        output.collect(randomKey, randomValue);
        numBytesToWrite -= keyLength + valueLength;
        reporter.incrCounter(Counters.BYTES_WRITTEN, keyLength + valueLength);
        reporter.incrCounter(Counters.RECORDS_WRITTEN, 1);
        if (++itemCount % 200 == 0) {
            reporter.setStatus("wrote record " + itemCount + ". "
                    + numBytesToWrite + " bytes left.");
        }
    }
    reporter.setStatus("done with " + itemCount + " records.");
}

Example #14

Source File: NodeDumper.java From anthelion with Apache License 2.0

6 votes

/**
 * Outputs the url with the appropriate number of inlinks, outlinks, or for
 * score.
 */
public void map(Text key, Node node,
  OutputCollector<FloatWritable, Text> output, Reporter reporter)
  throws IOException {

  float number = 0;
  if (inlinks) {
    number = node.getNumInlinks();
  }
  else if (outlinks) {
    number = node.getNumOutlinks();
  }
  else {
    number = node.getInlinkScore();
  }

  // number collected with negative to be descending
  output.collect(new FloatWritable(-number), key);
}

Example #15

Source File: StreamXmlRecordReader.java From big-c with Apache License 2.0

6 votes

public StreamXmlRecordReader(FSDataInputStream in, FileSplit split, Reporter reporter,
                             JobConf job, FileSystem fs) throws IOException {
  super(in, split, reporter, job, fs);

  beginMark_ = checkJobGet(CONF_NS + "begin");
  endMark_ = checkJobGet(CONF_NS + "end");

  maxRecSize_ = job_.getInt(CONF_NS + "maxrec", 50 * 1000);
  lookAhead_ = job_.getInt(CONF_NS + "lookahead", 2 * maxRecSize_);
  synched_ = false;

  slowMatch_ = job_.getBoolean(CONF_NS + "slowmatch", false);
  if (slowMatch_) {
    beginPat_ = makePatternCDataOrMark(beginMark_);
    endPat_ = makePatternCDataOrMark(endMark_);
  }
  init();
}

Example #16

Source File: OptimizedDataJoinMapperBase.java From hiped2 with Apache License 2.0

6 votes

public void map(Object key, Object value,
                OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }
  addLongValue("totalCount", 1);
  OutputValue aRecord = genMapOutputValue(value);
  if (aRecord == null) {
    addLongValue("discardedCount", 1);
    return;
  }
  aRecord.setSmaller(smaller);
  String groupKey = genGroupKey(key, aRecord);
  if (groupKey == null) {
    addLongValue("nullGroupKeyCount", 1);
    return;
  }
  outputKey.setKey(groupKey);
  output.collect(outputKey, aRecord);
  addLongValue("collectedCount", 1);
}

Example #17

Source File: LinkDbMerger.java From anthelion with Apache License 2.0

6 votes

public void reduce(Text key, Iterator<Inlinks> values, OutputCollector<Text, Inlinks> output, Reporter reporter) throws IOException {

    Inlinks result = new Inlinks();

    while (values.hasNext()) {
      Inlinks inlinks = values.next();

      int end = Math.min(maxInlinks - result.size(), inlinks.size());
      Iterator<Inlink> it = inlinks.iterator();
      int i = 0;
      while(it.hasNext() && i++ < end) {
        result.add(it.next());
      }
    }
    if (result.size() == 0) return;
    output.collect(key, result);
    
  }

Example #18

Source File: HadoopOutputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testCloseWithoutTaskCommit() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	when(outputCommitter.needsTaskCommit(any(TaskAttemptContext.class))).thenReturn(false);
	DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
	JobConf jobConf = mock(JobConf.class);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
	outputFormat.recordWriter = recordWriter;
	outputFormat.outputCommitter = outputCommitter;

	outputFormat.close();

	verify(recordWriter, times(1)).close(any(Reporter.class));
	verify(outputCommitter, times(0)).commitTask(any(TaskAttemptContext.class));
}

Example #19

Source File: TokenCountMapper.java From hadoop-gpu with Apache License 2.0

5 votes

public void map(K key, Text value,
                OutputCollector<Text, LongWritable> output,
                Reporter reporter)
  throws IOException {
  // get input text
  String text = value.toString();       // value is line of text

  // tokenize the value
  StringTokenizer st = new StringTokenizer(text);
  while (st.hasMoreTokens()) {
    // output <token,1> pairs
    output.collect(new Text(st.nextToken()), new LongWritable(1));
  }  
}

Example #20

Source File: SegmentReader.java From nutch-htmlunit with Apache License 2.0

5 votes

public void map(WritableComparable<?> key, Writable value,
    OutputCollector<Text, NutchWritable> collector, Reporter reporter) throws IOException {
  // convert on the fly from old formats with UTF8 keys.
  // UTF8 deprecated and replaced by Text.
  if (key instanceof Text) {
    newKey.set(key.toString());
    key = newKey;
  }
  collector.collect((Text)key, new NutchWritable(value));
}

Example #21

Source File: RandomTextWriter.java From RDFS with Apache License 2.0

5 votes

/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(Text key, Text value,
                OutputCollector<Text, Text> output, 
                Reporter reporter) throws IOException {
  int itemCount = 0;
  while (numBytesToWrite > 0) {
    // Generate the key/value 
    int noWordsKey = minWordsInKey + 
      (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0);
    int noWordsValue = minWordsInValue + 
      (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0);
    Text keyWords = generateSentence(noWordsKey);
    Text valueWords = generateSentence(noWordsValue);
    
    // Write the sentence 
    output.collect(keyWords, valueWords);
    
    numBytesToWrite -= (keyWords.getLength() + valueWords.getLength());
    
    // Update counters, progress etc.
    reporter.incrCounter(Counters.BYTES_WRITTEN, 
                         (keyWords.getLength()+valueWords.getLength()));
    reporter.incrCounter(Counters.RECORDS_WRITTEN, 1);
    if (++itemCount % 200 == 0) {
      reporter.setStatus("wrote record " + itemCount + ". " + 
                         numBytesToWrite + " bytes left.");
    }
  }
  reporter.setStatus("done with " + itemCount + " records.");
}

Example #22

Source File: BusyAirports.java From gemfirexd-oss with Apache License 2.0

5 votes

@Override
public void reduce(Text token, Iterator<IntWritable> values,
    OutputCollector<Text, IntWritable> output,
    Reporter reporter) throws IOException {
  int sum = 0;

  while (values.hasNext()) {
    sum += values.next().get();
  }

  output.collect(token, new IntWritable(sum));
}

Example #23

Source File: TestTableSnapshotInputFormat.java From hbase with Apache License 2.0

5 votes

@Override
public void map(ImmutableBytesWritable key, Result value,
    OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter)
    throws IOException {
  verifyRowFromMap(key, value);
  collector.collect(key, NullWritable.get());
}

Example #24

Source File: FilterAlignments.java From emr-sample-apps with Apache License 2.0

5 votes

public synchronized void reduce(IntWritable readid, Iterator<BytesWritable> values,
  			             OutputCollector<IntWritable, BytesWritable> output, Reporter reporter) throws IOException 
{
	boolean recordBest = true;
	bestAlignment.fromBytes(values.next());
				
	while (values.hasNext()) 
	{
		curAlignment.fromBytes(values.next());
		
		if (curAlignment.m_differences < bestAlignment.m_differences)
		{
			bestAlignment.set(curAlignment);
			recordBest = true;
		}
		else if (curAlignment.m_differences == bestAlignment.m_differences)
		{
			recordBest = false;
		}
		else
		{
			// curAlignment is worse than best alignment, nothing to do
			
		}
	}
	
	if (recordBest)
	{
		output.collect(readid, bestAlignment.toBytes());
	}
}

Example #25

Source File: L11.java From spork with Apache License 2.0

5 votes

public void reduce(
        Text key,
        Iterator<Text> iter,
        OutputCollector<Text, Text> oc,
        Reporter reporter) throws IOException {

    // Just take the key and the first value.
    oc.collect(key, iter.next());
}

Example #26

Source File: PipeMapRed.java From big-c with Apache License 2.0

5 votes

void startOutputThreads(OutputCollector output, Reporter reporter) 
  throws IOException {
  inWriter_ = createInputWriter();
  outReader_ = createOutputReader();
  outThread_ = new MROutputThread(outReader_, output, reporter);
  outThread_.start();
  errThread_ = new MRErrorThread();
  errThread_.setReporter(reporter);
  errThread_.start();
}

Example #27

Source File: MneInputFormat.java From mnemonic with Apache License 2.0

5 votes

@Override
public RecordReader<NullWritable, MV>
getRecordReader(InputSplit inputSpilt,
                JobConf jobConf,
                Reporter reporter) throws IOException {
  MneMapredRecordReader<MV, V> reader =
          new MneMapredRecordReader<MV, V>((FileSplit) inputSpilt, jobConf);
  return reader;
}

Example #28

Source File: TestStreamingOutputKeyValueTypes.java From big-c with Apache License 2.0

5 votes

public void reduce(K key, Iterator<V> values,
    OutputCollector<LongWritable, Text> output, Reporter reporter)
    throws IOException {
  LongWritable l = new LongWritable();
  while (values.hasNext()) {
    output.collect(l, new Text(values.next().toString()));
  }
}

Example #29

Source File: ExportFileFlusherTest.java From emr-dynamodb-connector with Apache License 2.0

5 votes

@Test
public void close_whenRTE_thenConsecutiveSyncCallFails() throws IOException,
    InterruptedException {
  doThrow(new RuntimeException()).when(recordWriter).close(Reporter.NULL);
  flusher.close(recordWriter, Reporter.NULL);
  expectedException.expect(RuntimeException.class);
  flusher.sync();
}

Example #30

Source File: TestFetcher.java From big-c with Apache License 2.0

5 votes

public FakeFetcher(JobConf job, TaskAttemptID reduceId,
    ShuffleSchedulerImpl<K,V> scheduler, MergeManagerImpl<K,V> merger,
    Reporter reporter, ShuffleClientMetrics metrics,
    ExceptionReporter exceptionReporter, SecretKey jobTokenSecret,
    HttpURLConnection connection, int id) {
  super(job, reduceId, scheduler, merger, reporter, metrics,
      exceptionReporter, jobTokenSecret, id);
  this.connection = connection;
}