org.apache.hadoop.mapred.Reporter Java Examples

The following examples show how to use org.apache.hadoop.mapred.Reporter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: big-c   Author: yncxcw   File: StreamXmlRecordReader.java    License: Apache License 2.0 6 votes vote down vote up
public StreamXmlRecordReader(FSDataInputStream in, FileSplit split, Reporter reporter,
                             JobConf job, FileSystem fs) throws IOException {
  super(in, split, reporter, job, fs);

  beginMark_ = checkJobGet(CONF_NS + "begin");
  endMark_ = checkJobGet(CONF_NS + "end");

  maxRecSize_ = job_.getInt(CONF_NS + "maxrec", 50 * 1000);
  lookAhead_ = job_.getInt(CONF_NS + "lookahead", 2 * maxRecSize_);
  synched_ = false;

  slowMatch_ = job_.getBoolean(CONF_NS + "slowmatch", false);
  if (slowMatch_) {
    beginPat_ = makePatternCDataOrMark(beginMark_);
    endPat_ = makePatternCDataOrMark(endMark_);
  }
  init();
}
 
Example #2
Source Project: semafor-semantic-parser   Author: Noahs-ARK   File: LRIdentificationModelHadoop.java    License: GNU General Public License v3.0 6 votes vote down vote up
public String getBestFrame(String frameLine, String parseLine, Reporter reporter)
{
	String result = null;
	Set<String> set = mFrameMap.keySet();
	double maxVal = -Double.MIN_VALUE;
	for(String frame: set)
	{
		String[] toks = frameLine.split("\t");
		String newFrameLine = frame+"\t"+toks[1]+"\t"+toks[2];
		LogFormula formula = getNumeratorFormula(newFrameLine, parseLine, reporter);
		double val = formula.evaluate(this).exponentiate();
		if(val>maxVal)
		{
			maxVal = val;
			result=""+frame;
		}
		if(reporter!=null)
			reporter.setStatus("Considered "+frame+" for frameLine:"+frameLine);
		System.out.println("Considered "+frame+" for frameLine:"+frameLine);
	}
	return result;
}
 
Example #3
Source Project: big-c   Author: yncxcw   File: TestDFSIO.java    License: Apache License 2.0 6 votes vote down vote up
@Override // IOMapperBase
public Long doIO(Reporter reporter, 
                   String name, 
                   long totalSize // in bytes
                 ) throws IOException {
  OutputStream out = (OutputStream)this.stream;
  // write to the file
  long nrRemaining;
  for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= bufferSize) {
    int curSize = (bufferSize < nrRemaining) ? bufferSize : (int)nrRemaining;
    out.write(buffer, 0, curSize);
    reporter.setStatus("writing " + name + "@" + 
                       (totalSize - nrRemaining) + "/" + totalSize 
                       + " ::host = " + hostName);
  }
  return Long.valueOf(totalSize);
}
 
Example #4
Source Project: hadoop-gpu   Author: koichi626   File: PipesReducer.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void startApplication(OutputCollector<K3, V3> output, Reporter reporter) throws IOException {
  if (application == null) {
    try {
      LOG.info("starting application");
      application = 
        new Application<K2, V2, K3, V3>(
            job, null, output, reporter, 
            (Class<? extends K3>) job.getOutputKeyClass(), 
            (Class<? extends V3>) job.getOutputValueClass());
      downlink = application.getDownlink();
    } catch (InterruptedException ie) {
      throw new RuntimeException("interrupted", ie);
    }
    int reduce=0;
    downlink.runReduce(reduce, Submitter.getIsJavaRecordWriter(job));
  }
}
 
Example #5
Source Project: HiveKudu-Handler   Author: BimalTandel   File: HiveKuduTableOutputFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void close(Reporter reporter) throws IOException {
    try {
        LOG.warn("I was called : close");
        processRowErrors(session.close());
        shutdownClient();
    } catch (Exception e) {
        throw new IOException("Encountered an error while closing this task", e);
    } finally {
        if (reporter != null) {
            // This is the only place where we have access to the context in the record writer,
            // so set the counter here.
            reporter.getCounter(Counters.ROWS_WITH_ERRORS).setValue(rowsWithErrors.get());
        }
    }
}
 
Example #6
Source Project: nutch-htmlunit   Author: xautlx   File: CleaningJob.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void reduce(ByteWritable key, Iterator<Text> values,
        OutputCollector<Text, ByteWritable> output, Reporter reporter)
        throws IOException {
    while (values.hasNext()) {
        Text document = values.next();
        writers.delete(document.toString());
        totalDeleted++;
        reporter.incrCounter("CleaningJobStatus", "Deleted documents",
                1);
        // if (numDeletes >= NUM_MAX_DELETE_REQUEST) {
        // LOG.info("CleaningJob: deleting " + numDeletes
        // + " documents");
        // // TODO updateRequest.process(solr);
        // // TODO updateRequest = new UpdateRequest();
        // writers.delete(key.toString());
        // totalDeleted += numDeletes;
        // numDeletes = 0;
        // }
    }
}
 
Example #7
Source Project: flink   Author: apache   File: HadoopOutputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCloseWithoutTaskCommit() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	when(outputCommitter.needsTaskCommit(any(TaskAttemptContext.class))).thenReturn(false);
	DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
	JobConf jobConf = mock(JobConf.class);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
	outputFormat.recordWriter = recordWriter;
	outputFormat.outputCommitter = outputCommitter;

	outputFormat.close();

	verify(recordWriter, times(1)).close(any(Reporter.class));
	verify(outputCommitter, times(0)).commitTask(any(TaskAttemptContext.class));
}
 
Example #8
Source Project: hadoop   Author: naver   File: InputSampler.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      samples.add(key);
      key = reader.createKey();
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}
 
Example #9
Source Project: big-c   Author: yncxcw   File: ValueAggregatorCombiner.java    License: Apache License 2.0 6 votes vote down vote up
/** Combines values for a given key.  
 * @param key the key is expected to be a Text object, whose prefix indicates
 * the type of aggregation to aggregate the values. 
 * @param values the values to combine
 * @param output to collect combined values
 */
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
  String keyStr = key.toString();
  int pos = keyStr.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR);
  String type = keyStr.substring(0, pos);
  ValueAggregator aggregator = ValueAggregatorBaseDescriptor
    .generateValueAggregator(type);
  while (values.hasNext()) {
    aggregator.addNextValue(values.next());
  }
  Iterator outputs = aggregator.getCombinerOutput().iterator();

  while (outputs.hasNext()) {
    Object v = outputs.next();
    if (v instanceof Text) {
      output.collect(key, (Text)v);
    } else {
      output.collect(key, new Text(v.toString()));
    }
  }
}
 
Example #10
Source Project: wikireverse   Author: rossf7   File: WikiMetadata.java    License: MIT License 6 votes vote down vote up
public Hashtable<String, LinkWritable> createResults(Page page, Reporter reporter)
		throws IOException, JsonParseException, URISyntaxException {
	Hashtable<String, LinkWritable> results = new Hashtable<String, LinkWritable>();
	HashSet<String> linkUrls = page.getLinkUrls();
	
	if (linkUrls != null && linkUrls.isEmpty() == false) {
		List<WikiArticle> articles = filterArticles(linkUrls, reporter);

		for (WikiArticle article : articles) {
		results.put(article.getKey(), new LinkWritable(article.getArticleName(),
														formatField(page.getTitle(), TITLE_LENGTH),
														page.getWarcDate(),
														page.getUrl()));
		}
	}
	
	return results;
}
 
Example #11
Source Project: hiped2   Author: alexholmes   File: OptimizedDataJoinMapperBase.java    License: Apache License 2.0 6 votes vote down vote up
public void map(Object key, Object value,
                OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }
  addLongValue("totalCount", 1);
  OutputValue aRecord = genMapOutputValue(value);
  if (aRecord == null) {
    addLongValue("discardedCount", 1);
    return;
  }
  aRecord.setSmaller(smaller);
  String groupKey = genGroupKey(key, aRecord);
  if (groupKey == null) {
    addLongValue("nullGroupKeyCount", 1);
    return;
  }
  outputKey.setKey(groupKey);
  output.collect(outputKey, aRecord);
  addLongValue("collectedCount", 1);
}
 
Example #12
Source Project: anthelion   Author: YahooArchive   File: LinkDbMerger.java    License: Apache License 2.0 6 votes vote down vote up
public void reduce(Text key, Iterator<Inlinks> values, OutputCollector<Text, Inlinks> output, Reporter reporter) throws IOException {

    Inlinks result = new Inlinks();

    while (values.hasNext()) {
      Inlinks inlinks = values.next();

      int end = Math.min(maxInlinks - result.size(), inlinks.size());
      Iterator<Inlink> it = inlinks.iterator();
      int i = 0;
      while(it.hasNext() && i++ < end) {
        result.add(it.next());
      }
    }
    if (result.size() == 0) return;
    output.collect(key, result);
    
  }
 
Example #13
Source Project: anthelion   Author: YahooArchive   File: NodeDumper.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Outputs the url with the appropriate number of inlinks, outlinks, or for
 * score.
 */
public void map(Text key, Node node,
  OutputCollector<FloatWritable, Text> output, Reporter reporter)
  throws IOException {

  float number = 0;
  if (inlinks) {
    number = node.getNumInlinks();
  }
  else if (outlinks) {
    number = node.getNumOutlinks();
  }
  else {
    number = node.getInlinkScore();
  }

  // number collected with negative to be descending
  output.collect(new FloatWritable(-number), key);
}
 
Example #14
Source Project: big-c   Author: yncxcw   File: TestDBInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * test DBInputFormat class. Class should split result for chunks
 * @throws Exception
 */
@Test(timeout = 10000)
public void testDBInputFormat() throws Exception {
  JobConf configuration = new JobConf();
  setupDriver(configuration);
  
  DBInputFormat<NullDBWritable> format = new DBInputFormat<NullDBWritable>();
  format.setConf(configuration);
  format.setConf(configuration);
  DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10);
  Reporter reporter = mock(Reporter.class);
  RecordReader<LongWritable, NullDBWritable> reader = format.getRecordReader(
      splitter, configuration, reporter);

  configuration.setInt(MRJobConfig.NUM_MAPS, 3);
  InputSplit[] lSplits = format.getSplits(configuration, 3);
  assertEquals(5, lSplits[0].getLength());
  assertEquals(3, lSplits.length);

  // test reader .Some simple tests
  assertEquals(LongWritable.class, reader.createKey().getClass());
  assertEquals(0, reader.getPos());
  assertEquals(0, reader.getProgress(), 0.001);
  reader.close();
}
 
Example #15
Source Project: hadoop-gpu   Author: koichi626   File: DataJoinMapperBase.java    License: Apache License 2.0 6 votes vote down vote up
public void map(Object key, Object value,
                OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }
  addLongValue("totalCount", 1);
  TaggedMapOutput aRecord = generateTaggedMapOutput(value);
  if (aRecord == null) {
    addLongValue("discardedCount", 1);
    return;
  }
  Text groupKey = generateGroupKey(aRecord);
  if (groupKey == null) {
    addLongValue("nullGroupKeyCount", 1);
    return;
  }
  output.collect(groupKey, aRecord);
  addLongValue("collectedCount", 1);
}
 
Example #16
Source Project: systemds   Author: tugraz-isds   File: ReaderTextLIBSVMParallel.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Object call() 
	throws Exception 
{
	RecordReader<LongWritable, Text> reader = _informat.getRecordReader(_split, _job, Reporter.NULL);
	LongWritable key = new LongWritable();
	Text oneLine = new Text();

	try {
		// count rows from the first row
		while (reader.next(key, oneLine)) {
			_nrows++;
		}
	} 
	catch (Exception e) {
		_rc = false;
		_errMsg = "RecordReader error libsvm format. split: "+ _split.toString() + e.getMessage();
		throw new IOException(_errMsg);
	} 
	finally {
		IOUtilFunctions.closeSilently(reader);
	}

	return null;
}
 
Example #17
Source Project: ignite   Author: apache   File: HadoopV1OutputCollector.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * @param jobConf Job configuration.
 * @param taskCtx Task context.
 * @param directWrite Direct write flag.
 * @param fileName File name.
 * @throws IOException In case of IO exception.
 */
HadoopV1OutputCollector(JobConf jobConf, HadoopTaskContext taskCtx, boolean directWrite,
    @Nullable String fileName, TaskAttemptID attempt) throws IOException {
    this.jobConf = jobConf;
    this.taskCtx = taskCtx;
    this.attempt = attempt;

    if (directWrite) {
        jobConf.set("mapreduce.task.attempt.id", attempt.toString());

        OutputFormat outFormat = jobConf.getOutputFormat();

        writer = outFormat.getRecordWriter(null, jobConf, fileName, Reporter.NULL);
    }
    else
        writer = null;
}
 
Example #18
Source Project: hadoop-book   Author: elephantscale   File: RandomWriter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(WritableComparable key,
        Writable value,
        OutputCollector<BytesWritable, BytesWritable> output,
        Reporter reporter) throws IOException {
    int itemCount = 0;
    while (numBytesToWrite > 0) {
        int keyLength = minKeySize
                + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
        randomKey.setSize(keyLength);
        randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
        int valueLength = minValueSize
                + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
        randomValue.setSize(valueLength);
        randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
        output.collect(randomKey, randomValue);
        numBytesToWrite -= keyLength + valueLength;
        reporter.incrCounter(Counters.BYTES_WRITTEN, keyLength + valueLength);
        reporter.incrCounter(Counters.RECORDS_WRITTEN, 1);
        if (++itemCount % 200 == 0) {
            reporter.setStatus("wrote record " + itemCount + ". "
                    + numBytesToWrite + " bytes left.");
        }
    }
    reporter.setStatus("done with " + itemCount + " records.");
}
 
Example #19
Source Project: RDFS   Author: iVCE   File: CompositeInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Construct a CompositeRecordReader for the children of this InputFormat
 * as defined in the init expression.
 * The outermost join need only be composable, not necessarily a composite.
 * Mandating TupleWritable isn't strictly correct.
 */
@SuppressWarnings("unchecked") // child types unknown
public ComposableRecordReader<K,TupleWritable> getRecordReader(
    InputSplit split, JobConf job, Reporter reporter) throws IOException {
  setFormat(job);
  return root.getRecordReader(split, job, reporter);
}
 
Example #20
Source Project: hadoop-solr   Author: lucidworks   File: RegexIngestMapper.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public LWDocument[] toDocuments(Writable key, Writable value, Reporter reporter,
    Configuration conf) throws IOException {
  if (key != null && value != null) {
    LWDocument doc = createDocument(key.toString() + "-" + System.currentTimeMillis(), null);
    Matcher matcher = regex.matcher(value.toString());
    if (matcher != null) {
      if (match) {
        if (matcher.matches()) {
          processMatch(doc, matcher);
        }
      } else {//
        while (matcher.find()) {
          processMatch(doc, matcher);
          reporter.progress();//do we really even need this?
        }
      }
    }
    // Adding the file path where this record was taken
    FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
    String originalLogFilePath = fileSplit.getPath().toUri().getPath();
    doc.addField(FIELD_PATH, originalLogFilePath);
    String docId = originalLogFilePath + "-" + doc.getId();
    doc.setId(docId);
    return new LWDocument[] {doc};
  }
  return null;
}
 
Example #21
Source Project: hbase   Author: apache   File: TestTableMapReduce.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Pass the key, and reversed value to reduce
 */
public void map(ImmutableBytesWritable key, Result value,
  OutputCollector<ImmutableBytesWritable, Put> output,
  Reporter reporter)
throws IOException {
  output.collect(key, TestTableMapReduceBase.map(key, value));
}
 
Example #22
Source Project: RDFS   Author: iVCE   File: RegexMapper.java    License: Apache License 2.0 5 votes vote down vote up
public void map(K key, Text value,
                OutputCollector<Text, LongWritable> output,
                Reporter reporter)
  throws IOException {
  String text = value.toString();
  Matcher matcher = pattern.matcher(text);
  while (matcher.find()) {
    output.collect(new Text(matcher.group(group)), new LongWritable(1));
  }
}
 
Example #23
Source Project: tez   Author: apache   File: TezGroupedSplitsInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
public TezGroupedSplitsRecordReader(TezGroupedSplit split, JobConf job,
    Reporter reporter) throws IOException {
  this.groupedSplit = split;
  this.job = job;
  this.reporter = reporter;
  initNextRecordReader();
}
 
Example #24
Source Project: anthelion   Author: YahooArchive   File: CrawlDbReader.java    License: Apache License 2.0 5 votes vote down vote up
public void map(Text key, CrawlDatum value, OutputCollector<Text, LongWritable> output, Reporter reporter)
        throws IOException {
  output.collect(new Text("T"), COUNT_1);
  output.collect(new Text("status " + value.getStatus()), COUNT_1);
  output.collect(new Text("retry " + value.getRetriesSinceFetch()), COUNT_1);
  output.collect(new Text("s"), new LongWritable((long) (value.getScore() * 1000.0)));
  if(sort){
    URL u = new URL(key.toString());
    String host = u.getHost();
    output.collect(new Text("status " + value.getStatus() + " " + host), COUNT_1);
  }
}
 
Example #25
Source Project: hadoop-gpu   Author: koichi626   File: DBInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@SuppressWarnings("unchecked")
public RecordReader<LongWritable, T> getRecordReader(InputSplit split,
    JobConf job, Reporter reporter) throws IOException {

  Class inputClass = dbConf.getInputClass();
  try {
    return new DBRecordReader((DBInputSplit) split, inputClass, job);
  }
  catch (SQLException ex) {
    throw new IOException(ex.getMessage());
  }
}
 
Example #26
Source Project: Flink-CEPplus   Author: ljygz   File: HadoopReduceFunctionITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void reduce(IntWritable k, Iterator<Text> vs, OutputCollector<IntWritable, IntWritable> out, Reporter r)
		throws IOException {
	int commentCnt = 0;
	while (vs.hasNext()) {
		String v = vs.next().toString();
		if (v.startsWith("Comment")) {
			commentCnt++;
		}
	}
	out.collect(k, new IntWritable(commentCnt));
}
 
Example #27
Source Project: Flink-CEPplus   Author: ljygz   File: HadoopMapFunctionITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void map(final IntWritable k, final Text v,
		final OutputCollector<IntWritable, Text> out, final Reporter r) throws IOException {
	if (v.toString().contains("bananas")) {
		out.collect(k, v);
	}
}
 
Example #28
Source Project: Flink-CEPplus   Author: ljygz   File: HadoopMapFunctionITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void map(IntWritable k, Text v, OutputCollector<IntWritable, Text> out, Reporter r)
		throws IOException {
	if (v.toString().startsWith(filterPrefix)) {
		out.collect(k, v);
	}
}
 
Example #29
Source Project: hadoop   Author: naver   File: DBInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
public RecordReader<LongWritable, T> getRecordReader(InputSplit split,
    JobConf job, Reporter reporter) throws IOException {

  // wrap the DBRR in a shim class to deal with API differences.
  return new DBRecordReaderWrapper<T>(
      (org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>) 
      createDBRecordReader(
        (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) split, job));
}
 
Example #30
Source Project: RDFS   Author: iVCE   File: PipesReducer.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Handle the end of the input by closing down the application.
 */
public void close() throws IOException {
  // if we haven't started the application, we have nothing to do
  if (isOk) {
    OutputCollector<K3, V3> nullCollector = new OutputCollector<K3, V3>() {
      public void collect(K3 key, 
                          V3 value) throws IOException {
        // NULL
      }
    };
    startApplication(nullCollector, Reporter.NULL);
  }
  try {
    if (isOk) {
      application.getDownlink().endOfInput();
    } else {
      // send the abort to the application and let it clean up
      application.getDownlink().abort();
    }
    LOG.info("waiting for finish");
    application.waitForFinish();
    LOG.info("got done");
  } catch (Throwable t) {
    application.abort(t);
  } finally {
    application.cleanup();
  }
}