org.apache.hadoop.mapreduce.TaskAttemptContext Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.TaskAttemptContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WikipediaRecordReader.java    From datawave with Apache License 2.0 6 votes vote down vote up
private void initializeSuperClass(InputSplit split, TaskAttemptContext context) throws IOException {
    super.initialize(split, context);
    if (split instanceof FileSplit) {
        FileSplit fs = (FileSplit) split;
        Path p = fs.getPath();
        rawFileName = p.getName();
        
        if (log.isDebugEnabled()) {
            log.debug("FileSplit Info: ");
            log.debug("Start: " + fs.getStart());
            log.debug("Length: " + fs.getLength());
            log.debug("Locations: " + Arrays.toString(fs.getLocations()));
            log.debug("Path: " + fs.getPath());
        }
    } else {
        throw new IOException("Input Split unhandled.");
    }
}
 
Example #2
Source File: MapReduceParsedInputFormat.java    From incubator-retired-mrql with Apache License 2.0 6 votes vote down vote up
public ParsedRecordReader ( FileSplit split,
                            TaskAttemptContext context,
                            Class<? extends Parser> parser_class,
                            Trees args ) throws IOException {
    Configuration conf = context.getConfiguration();
    start = split.getStart();
    end = start + split.getLength();
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);
    fsin = fs.open(split.getPath());
    try {
        parser = parser_class.newInstance();
    } catch (Exception ex) {
        throw new Error("Unrecognized parser:"+parser_class);
    };
    parser.initialize(args);
    parser.open(fsin,start,end);
    result = null;
}
 
Example #3
Source File: DBOutputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) 
    throws IOException {
  DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
  String tableName = dbConf.getOutputTableName();
  String[] fieldNames = dbConf.getOutputFieldNames();
  
  if(fieldNames == null) {
    fieldNames = new String[dbConf.getOutputFieldCount()];
  }
  
  try {
    Connection connection = dbConf.getConnection();
    PreparedStatement statement = null;

    statement = connection.prepareStatement(
                  constructQuery(tableName, fieldNames));
    return new DBRecordWriter(connection, statement);
  } catch (Exception ex) {
    throw new IOException(ex.getMessage());
  }
}
 
Example #4
Source File: UpdateOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public UpdateRecordWriter(TaskAttemptContext context)
    throws ClassNotFoundException, SQLException {
  super(context);

  Configuration conf = getConf();

  DBConfiguration dbConf = new DBConfiguration(conf);
  this.tableName = dbConf.getOutputTableName();
  this.columnNames = dbConf.getOutputFieldNames();
  String updateKeyColumns =
      conf.get(ExportJobBase.SQOOP_EXPORT_UPDATE_COL_KEY);

  Set<String> updateKeys = new LinkedHashSet<String>();
  StringTokenizer stok = new StringTokenizer(updateKeyColumns, ",");
  while (stok.hasMoreTokens()) {
    String nextUpdateKey = stok.nextToken().trim();
    if (nextUpdateKey.length() > 0) {
      updateKeys.add(nextUpdateKey);
    } else {
      throw new RuntimeException("Invalid update key column value specified"
          + ": '" + updateKeyColumns + "'");
    }
  }

  updateCols = updateKeys.toArray(new String[updateKeys.size()]);
}
 
Example #5
Source File: ArchiveWriter.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
@Override
public void close(TaskAttemptContext arg0) throws IOException,
    InterruptedException {
    if (txtArchive != null) {
        txtArchive.close();
    }
    if (xmlArchive != null) {
        xmlArchive.close();
    }
    if (jsonArchive != null) {
        jsonArchive.close();
    }
    if (binaryArchive != null) {
        binaryArchive.close();
    }
}
 
Example #6
Source File: MrsPyramidRecordReader.java    From mrgeo with Apache License 2.0 6 votes vote down vote up
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
    InterruptedException
{
  if (split instanceof MrsPyramidInputSplit)
  {
    MrsPyramidInputSplit fsplit = (MrsPyramidInputSplit) split;

    ifContext = ImageInputFormatContext.load(context.getConfiguration());
    if (ifContext.getBounds() != null)
    {
      inputBounds = ifContext.getBounds();
    }
    scannedInputReader = createRecordReader(fsplit, context);
    tilesize = ifContext.getTileSize();
    zoomLevel = ifContext.getZoomLevel();
  }
  else
  {
    throw new IOException("Got a split of type " + split.getClass().getCanonicalName() +
        " but expected one of type " + MrsPyramidInputSplit.class.getCanonicalName());
  }
}
 
Example #7
Source File: BulkInputFormat.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Override
public RecordReader<Key,Value> createRecordReader(InputSplit split, TaskAttemptContext context) {
    
    return new RecordReaderBase<Key,Value>() {
        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (scannerIterator.hasNext()) {
                ++numKeysRead;
                Entry<Key,Value> entry = scannerIterator.next();
                currentK = currentKey = entry.getKey();
                currentV = currentValue = entry.getValue();
                if (log.isTraceEnabled())
                    log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
                return true;
            } else if (numKeysRead < 0) {
                numKeysRead = 0;
            }
            return false;
        }
    };
}
 
Example #8
Source File: TestFileOutputCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void testInvalidVersionNumber() throws IOException {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3);
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  try {
    new FileOutputCommitter(outDir, tContext);
    fail("should've thrown an exception!");
  } catch (IOException e) {
    //test passed
  }
}
 
Example #9
Source File: MultiTableOutputFormat.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
public void close(TaskAttemptContext context) throws IOException {
  for (BufferedMutator mutator : mutatorMap.values()) {
    mutator.close();
  }
  if (connection != null) {
    connection.close();
  }
}
 
Example #10
Source File: TezGroupedSplitsInputFormat.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<K, V> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException, InterruptedException {
  TezGroupedSplit groupedSplit = (TezGroupedSplit) split;
  initInputFormatFromSplit(groupedSplit);
  return new TezGroupedSplitsRecordReader(groupedSplit, context);
}
 
Example #11
Source File: MapTask.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public void close(TaskAttemptContext context
                  ) throws IOException,InterruptedException {
  try {
    collector.flush();
  } catch (ClassNotFoundException cnf) {
    throw new IOException("can't find class ", cnf);
  }
  collector.close();
}
 
Example #12
Source File: DatasetKeyInputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR",
    justification="Delegate set by setConf")
public RecordReader<E, Void> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
  Configuration conf = Hadoop.TaskAttemptContext.getConfiguration.invoke(taskAttemptContext);
  DefaultConfiguration.init(conf);
  return delegate.createRecordReader(inputSplit, taskAttemptContext);
}
 
Example #13
Source File: TestLineRecordReader.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testStripBOM() throws IOException {
  // the test data contains a BOM at the start of the file
  // confirm the BOM is skipped by LineRecordReader
  String UTF8_BOM = "\uFEFF";
  URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt");
  assertNotNull("Cannot find testBOM.txt", testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);

  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

  // read the data and check whether BOM is skipped
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
      (String[])null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);
  int numRecords = 0;
  boolean firstLine = true;
  boolean skipBOM = true;
  while (reader.nextKeyValue()) {
    if (firstLine) {
      firstLine = false;
      if (reader.getCurrentValue().toString().startsWith(UTF8_BOM)) {
        skipBOM = false;
      }
    }
    ++numRecords;
  }
  reader.close();

  assertTrue("BOM is not skipped", skipBOM);
}
 
Example #14
Source File: PgVectorRecordReader.java    From mrgeo with Apache License 2.0 5 votes vote down vote up
@SuppressFBWarnings(value = {"SQL_INJECTION_JDBC", "SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING"}, justification = "User supplied queries are a requirement")
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException
{
  if (!(split instanceof PgInputSplit)) {
    throw new IOException("Expected an instance of PgInputSplit");
  }
  offset = ((PgInputSplit) split).getOffset();
  limit = ((PgInputSplit) split).getLimit();
  currIndex = offset - 1;
  try
  {
    conn = PgVectorDataProvider.getDbConnection(dbSettings);
    // If the offset is < 0, then there is only one partition, so no need
    // for a limit query.
    String fullQuery = (offset < 0) ? dbSettings.getQuery() : (dbSettings.getQuery() + " OFFSET " + offset + " LIMIT " + limit);
    stmt = conn.prepareStatement(fullQuery,
            ResultSet.TYPE_FORWARD_ONLY,
            ResultSet.CONCUR_READ_ONLY);
    rs = ((PreparedStatement) stmt).executeQuery();
    ResultSetMetaData metadata = rs.getMetaData();
    columnCount = metadata.getColumnCount();
    columnLabels = new String[columnCount];
    for (int c=1; c <= columnCount; c++) {
      columnLabels[c-1] = metadata.getColumnLabel(c);
    }
  }
  catch (SQLException e)
  {
    throw new IOException("Could not open database.", e);
  }
}
 
Example #15
Source File: ErrorShardedDataTypeHandler.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Override
public void setup(TaskAttemptContext context) {
    IngestConfiguration ingestConfiguration = IngestConfigurationFactory.getIngestConfiguration();
    markingsHelper = ingestConfiguration.getMarkingsHelper(context.getConfiguration(), TypeRegistry.getType(TypeRegistry.ERROR_PREFIX));
    
    super.setup(context);
    
    this.errorHelper = (ErrorShardedIngestHelper) (TypeRegistry.getType("error").getIngestHelper(context.getConfiguration()));
    this.errorHelper.setDelegateHelper(this.helper);
    this.helper = this.errorHelper;
    
    this.conf = context.getConfiguration();
    
    this.setupDictionaryCache(conf.getInt(ERROR_PROP_PREFIX + SHARD_DICTIONARY_CACHE_ENTRIES, ShardedDataTypeHandler.SHARD_DINDEX_CACHE_DEFAULT_SIZE));
    
    setShardTableName(new Text(ConfigurationHelper.isNull(conf, ERROR_PROP_PREFIX + SHARD_TNAME, String.class)));
    String tableName = conf.get(ERROR_PROP_PREFIX + SHARD_GIDX_TNAME);
    setShardIndexTableName(tableName == null ? null : new Text(tableName));
    tableName = conf.get(ERROR_PROP_PREFIX + SHARD_GRIDX_TNAME);
    setShardReverseIndexTableName(tableName == null ? null : new Text(tableName));
    tableName = conf.get(ERROR_PROP_PREFIX + METADATA_TABLE_NAME);
    if (tableName == null) {
        setMetadataTableName(null);
        setMetadata(null);
    } else {
        setMetadataTableName(new Text(tableName));
        setMetadata(ingestConfiguration.createMetadata(getShardTableName(), getMetadataTableName(), null /* no load date table */,
                        getShardIndexTableName(), getShardReverseIndexTableName(), conf.getBoolean(ERROR_PROP_PREFIX + METADATA_TERM_FREQUENCY, false)));
    }
    tableName = conf.get(ERROR_PROP_PREFIX + SHARD_DINDX_NAME);
    setShardDictionaryIndexTableName(tableName == null ? null : new Text(tableName));
    
    try {
        defaultVisibility = flatten(markingFunctions.translateToColumnVisibility(markingsHelper.getDefaultMarkings()));
    } catch (Exception e) {
        throw new IllegalArgumentException("Failed to parse security marking configuration", e);
    }
    
    log.info("ShardedErrorDataTypeHandler configured.");
}
 
Example #16
Source File: GenerateDistCacheData.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a reader for this split of the distributed cache file list.
 */
@Override
public RecordReader<LongWritable, BytesWritable> createRecordReader(
    InputSplit split, final TaskAttemptContext taskContext)
    throws IOException, InterruptedException {
  return new SequenceFileRecordReader<LongWritable, BytesWritable>();
}
 
Example #17
Source File: InputSampler.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, Job job) 
    throws IOException, InterruptedException {
  List<InputSplit> splits = inf.getSplits(job);
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.size());
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(
        job.getConfiguration(), new TaskAttemptID());
    RecordReader<K,V> reader = inf.createRecordReader(
        splits.get(i), samplingContext);
    reader.initialize(splits.get(i), samplingContext);
    while (reader.nextKeyValue()) {
      samples.add(ReflectionUtils.copy(job.getConfiguration(),
                                       reader.getCurrentKey(), null));
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}
 
Example #18
Source File: MneInputFormat.java    From mnemonic with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<NullWritable, MV> createRecordReader(InputSplit inputSplit,
                   TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
  MneMapreduceRecordReader<MV, V> reader = new MneMapreduceRecordReader<MV, V>();
  reader.initialize(inputSplit, taskAttemptContext);
  return reader;
}
 
Example #19
Source File: PigOutputCommitter.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * @param context
 * @param mapStores 
 * @param reduceStores 
 * @throws IOException
 */
public PigOutputCommitter(TaskAttemptContext context,
        List<POStore> mapStores, List<POStore> reduceStores)
        throws IOException {
    // create and store the map and reduce output committers
    mapOutputCommitters = getCommitters(context, mapStores);
    reduceOutputCommitters = getCommitters(context, reduceStores);
    recoverySupported = context.getConfiguration().getBoolean(PigConfiguration.PIG_OUTPUT_COMMITTER_RECOVERY, false);
}
 
Example #20
Source File: NYCTLCColumnBasedHandlerTest.java    From datawave with Apache License 2.0 5 votes vote down vote up
private NYCTLCReader getNYCTLCRecordReader(String file) throws IOException, URISyntaxException {
    InputSplit split = ColumnBasedHandlerTestUtil.getSplit(file);
    TaskAttemptContext ctx = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    TypeRegistry.reset();
    TypeRegistry.getInstance(ctx.getConfiguration());
    log.debug(TypeRegistry.getContents());
    NYCTLCReader reader = new NYCTLCReader();
    reader.initialize(split, ctx);
    return reader;
}
 
Example #21
Source File: JsonFileRecordReader.java    From jumbune with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
public void initialize(InputSplit arg0, TaskAttemptContext arg1)
		throws IOException, InterruptedException {
	startJsonCurlyTag = ("{").getBytes(Charsets.UTF_8);
	endJsonCurlyTag = ("}").getBytes(Charsets.UTF_8);
	startJsonSquareTag = ("[").getBytes(Charsets.UTF_8);
	endJsonSquareTag = ("]").getBytes(Charsets.UTF_8);
}
 
Example #22
Source File: DGALongEdgeValueInputFormatTest.java    From distributed-graph-analytics with Apache License 2.0 5 votes vote down vote up
public EdgeReader<Text, LongWritable> createEdgeReader(final RecordReader<LongWritable,Text> rr) throws IOException {
    return new DGALongEdgeValueReader(){
        @Override
        protected RecordReader<LongWritable, Text> createLineRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
            return rr;
        }
    };
}
 
Example #23
Source File: MultiOutputFormat.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
    List<OutputFormat> formats = getNewApiFormats(CompatHandler.taskAttemptContext(context).getConfiguration());
    List<OutputCommitter> committers = new ArrayList<OutputCommitter>();
    for (OutputFormat format : formats) {
        committers.add(format.getOutputCommitter(context));
    }

    return new MultiNewOutputCommitter(committers);
}
 
Example #24
Source File: ColumnarSplitDataReader.java    From kylin with Apache License 2.0 5 votes vote down vote up
public void init(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
    rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid);

    FileSystem fs = FileSystem.get(context.getConfiguration());
    FileSplit fSplit = (FileSplit) split;
    Path path = fSplit.getPath();
    rowRecordReader = new RowRecordReader(cubeDesc, path, fs);
    metricsValuesBuffer = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);

    rowCount = new AtomicInteger(0);
}
 
Example #25
Source File: KeyIgnoringAnySAMOutputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public RecordWriter<K,SAMRecordWritable> getRecordWriter(
		TaskAttemptContext ctx, Path out)
	throws IOException
{
	if (this.header == null)
		throw new IOException(
			"Can't create a RecordWriter without the SAM header");

	final boolean writeHeader = ctx.getConfiguration().getBoolean(
		WRITE_HEADER_PROPERTY, true);

	switch (format) {
		case BAM:
			return new KeyIgnoringBAMRecordWriter<K>(
				out, header, writeHeader, ctx);

		case SAM:
			return new KeyIgnoringSAMRecordWriter<K>(
					out, header, writeHeader, ctx);

		case CRAM:
			return new KeyIgnoringCRAMRecordWriter<K>(
					out, header, writeHeader, ctx);

		default: assert false; return null;
	}
}
 
Example #26
Source File: CombineShimRecordReader.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit curSplit, TaskAttemptContext curContext)
    throws IOException, InterruptedException {
  this.split = (CombineFileSplit) curSplit;
  this.context = curContext;

  if (null == rr) {
    createChildReader();
  }

  FileSplit fileSplit = new FileSplit(this.split.getPath(index),
      this.split.getOffset(index), this.split.getLength(index),
      this.split.getLocations());
  this.rr.initialize(fileSplit, this.context);
}
 
Example #27
Source File: MainframeDatasetRecordReader.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit,
    TaskAttemptContext taskAttemptContext)
    throws IOException, InterruptedException {

  split = (MainframeDatasetInputSplit)inputSplit;
  conf = taskAttemptContext.getConfiguration();
  inputClass = (Class<T>) (conf.getClass(
              DBConfiguration.INPUT_CLASS_PROPERTY, null));
  key = null;
  datasetRecord = null;
  numberRecordRead = 0;
  datasetProcessed = 0;
}
 
Example #28
Source File: TestJobOutputCommitter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public synchronized OutputCommitter getOutputCommitter(
    TaskAttemptContext context) throws IOException {
  if (committer == null) {
    Path output = getOutputPath(context);
    committer = new CommitterWithCustomAbort(output, context);
  }
  return committer;
}
 
Example #29
Source File: TestFileOutputCommitter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void writeMapFileOutput(RecordWriter theRecordWriter,
    TaskAttemptContext context) throws IOException, InterruptedException {
  try {
    int key = 0;
    for (int i = 0 ; i < 10; ++i) {
      key = i;
      Text val = (i%2 == 1) ? val1 : val2;
      theRecordWriter.write(new LongWritable(key),
          val);        
    }
  } finally {
    theRecordWriter.close(context);
  }
}
 
Example #30
Source File: CSVReaderBase.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
    super.initialize(genericSplit, context);
    setInputDate(System.currentTimeMillis());
    initializeRawFileName(genericSplit);
    initializeTotalSize(genericSplit);
}