Java Code Examples for org.apache.hadoop.mapreduce.TaskAttemptContext#setStatus()

The following examples show how to use org.apache.hadoop.mapreduce.TaskAttemptContext#setStatus() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PigAvroInputFormat.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * Create and return an avro record reader.
 * It uses the input schema passed in to the
 * constructor.
 */
@Override
public RecordReader<NullWritable, Writable>
createRecordReader(InputSplit split, TaskAttemptContext context)
throws IOException,  InterruptedException {
    context.setStatus(split.toString());
    return new PigAvroRecordReader(context, (FileSplit) split, readerSchema,
            ignoreBadFiles, schemaToMergedSchemaMap, useMultipleSchemas);
}
 
Example 2
Source File: SolrRecordWriter.java    From hbase-indexer with Apache License 2.0 5 votes vote down vote up
@Override
  public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    if (context != null) {
      heartBeater.setProgress(context);
    }
    try {
      heartBeater.needHeartBeat();
      if (batch.size() > 0) {
        batchWriter.queueBatch(batch);
        numDocsWritten += batch.size();
        batch.clear();
      }
      LOG.info("docsWritten: {}", numDocsWritten);
      batchWriter.close(context);
//      if (outputZipFile) {
//        context.setStatus("Writing Zip");
//        packZipFile(); // Written to the perm location
//      } else {
//        context.setStatus("Copying Index");
//        fs.completeLocalOutput(perm, temp); // copy to dfs
//      }
    } catch (Exception e) {
      if (e instanceof IOException) {
        throw (IOException) e;
      }
      throw new IOException(e);
    } finally {
      heartBeater.cancelHeartBeat();
      heartBeater.close();
//      File tempFile = new File(temp.toString());
//      if (tempFile.exists()) {
//        FileUtils.forceDelete(new File(temp.toString()));
//      }
    }

    context.setStatus("Done");
  }
 
Example 3
Source File: PigAvroInputFormat.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Create and return an avro record reader.
 * It uses the input schema passed in to the
 * constructor.
 */
@Override
public RecordReader<NullWritable, Writable>
createRecordReader(InputSplit split, TaskAttemptContext context)
throws IOException,  InterruptedException {
    context.setStatus(split.toString());
    return new PigAvroRecordReader(context, (FileSplit) split, readerSchema,
            ignoreBadFiles, schemaToMergedSchemaMap, useMultipleSchemas);
}
 
Example 4
Source File: AvroStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * @see org.apache.pig.LoadFunc#getInputFormat()
 */
@Override
public InputFormat<NullWritable, GenericData.Record> getInputFormat()
    throws IOException {

  return new org.apache.pig.backend.hadoop.executionengine.mapReduceLayer
      .PigFileInputFormat<NullWritable, GenericData.Record>() {

    @Override
    public RecordReader<NullWritable, GenericData.Record>
      createRecordReader(final InputSplit is, final TaskAttemptContext tc)
        throws IOException, InterruptedException {
      Schema s = getInputAvroSchema();
      RecordReader<NullWritable, GenericData.Record> rr = null;
      if (s.getType() == Type.ARRAY) {
        rr = new AvroArrayReader(s);
      } else {
        rr = new AvroRecordReader(s);
      }
      try {
          rr.initialize(is, tc);
      } finally {
          rr.close();
      }
      tc.setStatus(is.toString());
      return rr;
    }
  };

}
 
Example 5
Source File: SolrRecordWriter.java    From examples with Apache License 2.0 5 votes vote down vote up
@Override
  public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    if (context != null) {
      heartBeater.setProgress(context);
    }
    try {
      heartBeater.needHeartBeat();
      if (batch.size() > 0) {
        batchWriter.queueBatch(batch);
        numDocsWritten += batch.size();
        batch.clear();
      }
      LOG.info("docsWritten: {}", numDocsWritten);
      batchWriter.close(context);
//      if (outputZipFile) {
//        context.setStatus("Writing Zip");
//        packZipFile(); // Written to the perm location
//      } else {
//        context.setStatus("Copying Index");
//        fs.completeLocalOutput(perm, temp); // copy to dfs
//      }
    } catch (Exception e) {
      if (e instanceof IOException) {
        throw (IOException) e;
      }
      throw new IOException(e);
    } finally {
      heartBeater.cancelHeartBeat();
      heartBeater.close();
//      File tempFile = new File(temp.toString());
//      if (tempFile.exists()) {
//        FileUtils.forceDelete(new File(temp.toString()));
//      }
    }

    context.setStatus("Done");
  }
 
Example 6
Source File: BatchWriter.java    From examples with Apache License 2.0 5 votes vote down vote up
public synchronized void close(TaskAttemptContext context)
    throws InterruptedException, SolrServerException, IOException {

  if (batchPool != null) {
    context.setStatus("Waiting for batches to complete");
    batchPool.shutdown();

    while (!batchPool.isTerminated()) {
      LOG.info(String.format(Locale.ENGLISH, 
          "Waiting for %d items and %d threads to finish executing", batchPool
              .getQueue().size(), batchPool.getActiveCount()));
      batchPool.awaitTermination(5, TimeUnit.SECONDS);
    }
  }
  context.setStatus("Committing Solr Phase 1");
  solr.commit(true, false);
  context.setStatus("Optimizing Solr");
  int maxSegments = context.getConfiguration().getInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, 1);
  LOG.info("Optimizing Solr: forcing merge down to {} segments", maxSegments);
  long start = System.nanoTime();
  solr.optimize(true, false, maxSegments);
  context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_REDUCER_MERGE_TIME.toString()).increment(System.nanoTime() - start);
  float secs = (System.nanoTime() - start) / (float)(10^9);
  LOG.info("Optimizing Solr: done forcing merge down to {} segments in {} secs", maxSegments, secs);
  context.setStatus("Committing Solr Phase 2");
  solr.commit(true, false);
  context.setStatus("Shutting down Solr");
  solr.shutdown();
}
 
Example 7
Source File: QseqInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public RecordReader<Text, SequencedFragment> createRecordReader(
                                        InputSplit genericSplit,
                                        TaskAttemptContext context) throws IOException, InterruptedException
{
	context.setStatus(genericSplit.toString());
	return new QseqRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat
}
 
Example 8
Source File: FastaInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public RecordReader<Text, ReferenceFragment> createRecordReader(
                                        InputSplit genericSplit,
                                        TaskAttemptContext context) throws IOException, InterruptedException
{
	context.setStatus(genericSplit.toString());
	return new FastaRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat
}
 
Example 9
Source File: FastqInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public RecordReader<Text, SequencedFragment> createRecordReader(
                                        InputSplit genericSplit,
                                        TaskAttemptContext context) throws IOException, InterruptedException
{
	context.setStatus(genericSplit.toString());
	return new FastqRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat
}
 
Example 10
Source File: AvroInputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<AvroWrapper<T>, NullWritable> createRecordReader(
    InputSplit split, TaskAttemptContext context) throws IOException,
    InterruptedException {
  context.setStatus(split.toString());
  return new AvroRecordReader<T>();
}
 
Example 11
Source File: MultiLineInputFormat.java    From dkpro-c4corpus with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit genericSplit,
        TaskAttemptContext context)
{
    context.setStatus(genericSplit.toString());
    return new MultiLineRecordReader();
}
 
Example 12
Source File: SequenceFileAsTextInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
public RecordReader<Text, Text> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException {
  context.setStatus(split.toString());
  return new SequenceFileAsTextRecordReader();
}
 
Example 13
Source File: SequenceFileInputFilter.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Create a record reader for the given split
 * @param split file split
 * @param context the task-attempt context
 * @return RecordReader
 */
public RecordReader<K, V> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException {
  context.setStatus(split.toString());
  return new FilterRecordReader<K, V>(context.getConfiguration());
}
 
Example 14
Source File: KeyValueTextInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
public RecordReader<Text, Text> createRecordReader(InputSplit genericSplit,
    TaskAttemptContext context) throws IOException {
  
  context.setStatus(genericSplit.toString());
  return new KeyValueLineRecordReader(context.getConfiguration());
}
 
Example 15
Source File: TreeMergeOutputFormat.java    From examples with Apache License 2.0 4 votes vote down vote up
@Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
      writeShardNumberFile(context);      
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
        
        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
            .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;
          
        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
//        writerConfig.setRAMBufferSizeMB(100); // improve performance
//        writerConfig.setMaxThreadStates(1);
        
        // disable compound file to improve performance
        // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);          
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);       
//          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);
        
        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
        
        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        long start = System.nanoTime();
        
        writer.addIndexes(indexes); 
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) 
        // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746
        
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        float secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Logical merge took {} secs", secs);        
        int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        start = System.nanoTime();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments); 
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data 
          // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
        
        start = System.nanoTime();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }
 
Example 16
Source File: NLineInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
public RecordReader<LongWritable, Text> createRecordReader(
    InputSplit genericSplit, TaskAttemptContext context) 
    throws IOException {
  context.setStatus(genericSplit.toString());
  return new LineRecordReader();
}
 
Example 17
Source File: SequenceFileAsTextInputFormat.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public RecordReader<Text, Text> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException {
  context.setStatus(split.toString());
  return new SequenceFileAsTextRecordReader();
}
 
Example 18
Source File: SequenceFileInputFilter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Create a record reader for the given split
 * @param split file split
 * @param context the task-attempt context
 * @return RecordReader
 */
public RecordReader<K, V> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException {
  context.setStatus(split.toString());
  return new FilterRecordReader<K, V>(context.getConfiguration());
}
 
Example 19
Source File: KeyValueTextInputFormat.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public RecordReader<Text, Text> createRecordReader(InputSplit genericSplit,
    TaskAttemptContext context) throws IOException {
  
  context.setStatus(genericSplit.toString());
  return new KeyValueLineRecordReader(context.getConfiguration());
}
 
Example 20
Source File: NLineInputFormat.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public RecordReader<LongWritable, Text> createRecordReader(
    InputSplit genericSplit, TaskAttemptContext context) 
    throws IOException {
  context.setStatus(genericSplit.toString());
  return new LineRecordReader();
}