Java Code Examples for org.apache.hadoop.mapred.Reporter#getInputSplit()

The following examples show how to use org.apache.hadoop.mapred.Reporter#getInputSplit() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DelegatingMapper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}
 
Example 2
Source File: DelegatingMapper.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}
 
Example 3
Source File: RegexIngestMapper.java    From hadoop-solr with Apache License 2.0 5 votes vote down vote up
@Override
public LWDocument[] toDocuments(Writable key, Writable value, Reporter reporter,
    Configuration conf) throws IOException {
  if (key != null && value != null) {
    LWDocument doc = createDocument(key.toString() + "-" + System.currentTimeMillis(), null);
    Matcher matcher = regex.matcher(value.toString());
    if (matcher != null) {
      if (match) {
        if (matcher.matches()) {
          processMatch(doc, matcher);
        }
      } else {//
        while (matcher.find()) {
          processMatch(doc, matcher);
          reporter.progress();//do we really even need this?
        }
      }
    }
    // Adding the file path where this record was taken
    FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
    String originalLogFilePath = fileSplit.getPath().toUri().getPath();
    doc.addField(FIELD_PATH, originalLogFilePath);
    String docId = originalLogFilePath + "-" + doc.getId();
    doc.setId(docId);
    return new LWDocument[] {doc};
  }
  return null;
}
 
Example 4
Source File: LineIndexer.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
public void map(LongWritable key, Text val,
    OutputCollector<Text, Text> output, Reporter reporter) throws IOException
{
  FileSplit fileSplit = (FileSplit)reporter.getInputSplit();
  String fileName = fileSplit.getPath().getName();
  location.set(fileName);

  String line = val.toString();
  StringTokenizer itr = new StringTokenizer(line.toLowerCase());
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    output.collect(word, location);
  }
}
 
Example 5
Source File: DelegatingMapper.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}
 
Example 6
Source File: InvertedIndex.java    From hadoop-book with Apache License 2.0 5 votes vote down vote up
public void map(LongWritable key, Text val,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
    String fileName = fileSplit.getPath().getName();
    location.set(fileName);

    String line = val.toString();
    StringTokenizer itr = new StringTokenizer(line.toLowerCase());
    while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        output.collect(word, location);
    }
}
 
Example 7
Source File: DelegatingMapper.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}
 
Example 8
Source File: GrokIngestMapper.java    From hadoop-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected LWDocument[] toDocuments(LongWritable key, Text value, Reporter reporter,
                                   Configuration conf) throws IOException {

  Map<String, Object> params = new HashMap<String, Object>();
  params.put(LOG_RUBY_PARAM, value.toString());
  params.put(FILTERS_ARRAY_RUBY_PARAM, filters);

  List<String> toRemoveList = new ArrayList<String>();
  toRemoveList.add(LOG_RUBY_PARAM);
  toRemoveList.add(FILTERS_ARRAY_RUBY_PARAM);
  Object response = executeScript(MATCHER_RUBY_CLASS, params, toRemoveList);

  try {
    RubyHash hash = (RubyHash) response;
    if (response != null) {
      Set<String> keys = hash.keySet();
      LWDocument document = createDocument();
      for (String currentKey : keys) {
        document.addField(currentKey, hash.get(currentKey));
      }

      // Adding the file where this log was taken
      FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
      String originalLogFilePath = fileSplit.getPath().toUri().getPath();
      document.addField(PATH_FIELD_NAME, originalLogFilePath);

      // Adding offset value
      document.addField(BYTE_OFFSET_FIELD_NAME, key.toString());

      // Set ID
      document.setId(originalLogFilePath + "-" + key.toString() + "-" + System.currentTimeMillis());

      return new LWDocument[] {document};
    } else {
      return null;
    }
  } catch (Exception e) {
    log.error("Error: " + e.getMessage());
    throw new RuntimeException("Error executing ruby script");
  }
}