org.apache.hadoop.mapreduce.TaskAttemptContext Java Exaples

Source File: WikipediaRecordReader.java From datawave with Apache License 2.0

6 votes

private void initializeSuperClass(InputSplit split, TaskAttemptContext context) throws IOException {
    super.initialize(split, context);
    if (split instanceof FileSplit) {
        FileSplit fs = (FileSplit) split;
        Path p = fs.getPath();
        rawFileName = p.getName();
        
        if (log.isDebugEnabled()) {
            log.debug("FileSplit Info: ");
            log.debug("Start: " + fs.getStart());
            log.debug("Length: " + fs.getLength());
            log.debug("Locations: " + Arrays.toString(fs.getLocations()));
            log.debug("Path: " + fs.getPath());
        }
    } else {
        throw new IOException("Input Split unhandled.");
    }
}

Source File: MapReduceParsedInputFormat.java From incubator-retired-mrql with Apache License 2.0

6 votes

public ParsedRecordReader ( FileSplit split,
                            TaskAttemptContext context,
                            Class<? extends Parser> parser_class,
                            Trees args ) throws IOException {
    Configuration conf = context.getConfiguration();
    start = split.getStart();
    end = start + split.getLength();
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);
    fsin = fs.open(split.getPath());
    try {
        parser = parser_class.newInstance();
    } catch (Exception ex) {
        throw new Error("Unrecognized parser:"+parser_class);
    };
    parser.initialize(args);
    parser.open(fsin,start,end);
    result = null;
}

Source File: DBOutputFormat.java From hadoop with Apache License 2.0

6 votes

/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) 
    throws IOException {
  DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
  String tableName = dbConf.getOutputTableName();
  String[] fieldNames = dbConf.getOutputFieldNames();
  
  if(fieldNames == null) {
    fieldNames = new String[dbConf.getOutputFieldCount()];
  }
  
  try {
    Connection connection = dbConf.getConnection();
    PreparedStatement statement = null;

    statement = connection.prepareStatement(
                  constructQuery(tableName, fieldNames));
    return new DBRecordWriter(connection, statement);
  } catch (Exception ex) {
    throw new IOException(ex.getMessage());
  }
}

Source File: UpdateOutputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

public UpdateRecordWriter(TaskAttemptContext context)
    throws ClassNotFoundException, SQLException {
  super(context);

  Configuration conf = getConf();

  DBConfiguration dbConf = new DBConfiguration(conf);
  this.tableName = dbConf.getOutputTableName();
  this.columnNames = dbConf.getOutputFieldNames();
  String updateKeyColumns =
      conf.get(ExportJobBase.SQOOP_EXPORT_UPDATE_COL_KEY);

  Set<String> updateKeys = new LinkedHashSet<String>();
  StringTokenizer stok = new StringTokenizer(updateKeyColumns, ",");
  while (stok.hasMoreTokens()) {
    String nextUpdateKey = stok.nextToken().trim();
    if (nextUpdateKey.length() > 0) {
      updateKeys.add(nextUpdateKey);
    } else {
      throw new RuntimeException("Invalid update key column value specified"
          + ": '" + updateKeyColumns + "'");
    }
  }

  updateCols = updateKeys.toArray(new String[updateKeys.size()]);
}

Source File: ArchiveWriter.java From marklogic-contentpump with Apache License 2.0

6 votes

@Override
public void close(TaskAttemptContext arg0) throws IOException,
    InterruptedException {
    if (txtArchive != null) {
        txtArchive.close();
    }
    if (xmlArchive != null) {
        xmlArchive.close();
    }
    if (jsonArchive != null) {
        jsonArchive.close();
    }
    if (binaryArchive != null) {
        binaryArchive.close();
    }
}

Source File: MrsPyramidRecordReader.java From mrgeo with Apache License 2.0

6 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
    InterruptedException
{
  if (split instanceof MrsPyramidInputSplit)
  {
    MrsPyramidInputSplit fsplit = (MrsPyramidInputSplit) split;

    ifContext = ImageInputFormatContext.load(context.getConfiguration());
    if (ifContext.getBounds() != null)
    {
      inputBounds = ifContext.getBounds();
    }
    scannedInputReader = createRecordReader(fsplit, context);
    tilesize = ifContext.getTileSize();
    zoomLevel = ifContext.getZoomLevel();
  }
  else
  {
    throw new IOException("Got a split of type " + split.getClass().getCanonicalName() +
        " but expected one of type " + MrsPyramidInputSplit.class.getCanonicalName());
  }
}

Source File: BulkInputFormat.java From datawave with Apache License 2.0

6 votes

@Override
public RecordReader<Key,Value> createRecordReader(InputSplit split, TaskAttemptContext context) {
    
    return new RecordReaderBase<Key,Value>() {
        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (scannerIterator.hasNext()) {
                ++numKeysRead;
                Entry<Key,Value> entry = scannerIterator.next();
                currentK = currentKey = entry.getKey();
                currentV = currentValue = entry.getValue();
                if (log.isTraceEnabled())
                    log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
                return true;
            } else if (numKeysRead < 0) {
                numKeysRead = 0;
            }
            return false;
        }
    };
}

Source File: TestFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

public void testInvalidVersionNumber() throws IOException {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3);
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  try {
    new FileOutputCommitter(outDir, tContext);
    fail("should've thrown an exception!");
  } catch (IOException e) {
    //test passed
  }
}

Source File: MultiTableOutputFormat.java From hbase with Apache License 2.0

5 votes

@Override
public void close(TaskAttemptContext context) throws IOException {
  for (BufferedMutator mutator : mutatorMap.values()) {
    mutator.close();
  }
  if (connection != null) {
    connection.close();
  }
}

Source File: TezGroupedSplitsInputFormat.java From incubator-tez with Apache License 2.0

5 votes

@Override
public RecordReader<K, V> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException, InterruptedException {
  TezGroupedSplit groupedSplit = (TezGroupedSplit) split;
  initInputFormatFromSplit(groupedSplit);
  return new TezGroupedSplitsRecordReader(groupedSplit, context);
}

Source File: MapTask.java From RDFS with Apache License 2.0

5 votes

@Override
public void close(TaskAttemptContext context
                  ) throws IOException,InterruptedException {
  try {
    collector.flush();
  } catch (ClassNotFoundException cnf) {
    throw new IOException("can't find class ", cnf);
  }
  collector.close();
}

Source File: DatasetKeyInputFormat.java From kite with Apache License 2.0

5 votes

@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR",
    justification="Delegate set by setConf")
public RecordReader<E, Void> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
  Configuration conf = Hadoop.TaskAttemptContext.getConfiguration.invoke(taskAttemptContext);
  DefaultConfiguration.init(conf);
  return delegate.createRecordReader(inputSplit, taskAttemptContext);
}

Source File: TestLineRecordReader.java From hadoop with Apache License 2.0

5 votes

@Test
public void testStripBOM() throws IOException {
  // the test data contains a BOM at the start of the file
  // confirm the BOM is skipped by LineRecordReader
  String UTF8_BOM = "\uFEFF";
  URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt");
  assertNotNull("Cannot find testBOM.txt", testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);

  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

  // read the data and check whether BOM is skipped
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
      (String[])null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);
  int numRecords = 0;
  boolean firstLine = true;
  boolean skipBOM = true;
  while (reader.nextKeyValue()) {
    if (firstLine) {
      firstLine = false;
      if (reader.getCurrentValue().toString().startsWith(UTF8_BOM)) {
        skipBOM = false;
      }
    }
    ++numRecords;
  }
  reader.close();

  assertTrue("BOM is not skipped", skipBOM);
}

Source File: PgVectorRecordReader.java From mrgeo with Apache License 2.0

5 votes

@SuppressFBWarnings(value = {"SQL_INJECTION_JDBC", "SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING"}, justification = "User supplied queries are a requirement")
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException
{
  if (!(split instanceof PgInputSplit)) {
    throw new IOException("Expected an instance of PgInputSplit");
  }
  offset = ((PgInputSplit) split).getOffset();
  limit = ((PgInputSplit) split).getLimit();
  currIndex = offset - 1;
  try
  {
    conn = PgVectorDataProvider.getDbConnection(dbSettings);
    // If the offset is < 0, then there is only one partition, so no need
    // for a limit query.
    String fullQuery = (offset < 0) ? dbSettings.getQuery() : (dbSettings.getQuery() + " OFFSET " + offset + " LIMIT " + limit);
    stmt = conn.prepareStatement(fullQuery,
            ResultSet.TYPE_FORWARD_ONLY,
            ResultSet.CONCUR_READ_ONLY);
    rs = ((PreparedStatement) stmt).executeQuery();
    ResultSetMetaData metadata = rs.getMetaData();
    columnCount = metadata.getColumnCount();
    columnLabels = new String[columnCount];
    for (int c=1; c <= columnCount; c++) {
      columnLabels[c-1] = metadata.getColumnLabel(c);
    }
  }
  catch (SQLException e)
  {
    throw new IOException("Could not open database.", e);
  }
}

Source File: ErrorShardedDataTypeHandler.java From datawave with Apache License 2.0

5 votes

@Override
public void setup(TaskAttemptContext context) {
    IngestConfiguration ingestConfiguration = IngestConfigurationFactory.getIngestConfiguration();
    markingsHelper = ingestConfiguration.getMarkingsHelper(context.getConfiguration(), TypeRegistry.getType(TypeRegistry.ERROR_PREFIX));
    
    super.setup(context);
    
    this.errorHelper = (ErrorShardedIngestHelper) (TypeRegistry.getType("error").getIngestHelper(context.getConfiguration()));
    this.errorHelper.setDelegateHelper(this.helper);
    this.helper = this.errorHelper;
    
    this.conf = context.getConfiguration();
    
    this.setupDictionaryCache(conf.getInt(ERROR_PROP_PREFIX + SHARD_DICTIONARY_CACHE_ENTRIES, ShardedDataTypeHandler.SHARD_DINDEX_CACHE_DEFAULT_SIZE));
    
    setShardTableName(new Text(ConfigurationHelper.isNull(conf, ERROR_PROP_PREFIX + SHARD_TNAME, String.class)));
    String tableName = conf.get(ERROR_PROP_PREFIX + SHARD_GIDX_TNAME);
    setShardIndexTableName(tableName == null ? null : new Text(tableName));
    tableName = conf.get(ERROR_PROP_PREFIX + SHARD_GRIDX_TNAME);
    setShardReverseIndexTableName(tableName == null ? null : new Text(tableName));
    tableName = conf.get(ERROR_PROP_PREFIX + METADATA_TABLE_NAME);
    if (tableName == null) {
        setMetadataTableName(null);
        setMetadata(null);
    } else {
        setMetadataTableName(new Text(tableName));
        setMetadata(ingestConfiguration.createMetadata(getShardTableName(), getMetadataTableName(), null /* no load date table */,
                        getShardIndexTableName(), getShardReverseIndexTableName(), conf.getBoolean(ERROR_PROP_PREFIX + METADATA_TERM_FREQUENCY, false)));
    }
    tableName = conf.get(ERROR_PROP_PREFIX + SHARD_DINDX_NAME);
    setShardDictionaryIndexTableName(tableName == null ? null : new Text(tableName));
    
    try {
        defaultVisibility = flatten(markingFunctions.translateToColumnVisibility(markingsHelper.getDefaultMarkings()));
    } catch (Exception e) {
        throw new IllegalArgumentException("Failed to parse security marking configuration", e);
    }
    
    log.info("ShardedErrorDataTypeHandler configured.");
}

Source File: GenerateDistCacheData.java From hadoop with Apache License 2.0

5 votes

/**
 * Returns a reader for this split of the distributed cache file list.
 */
@Override
public RecordReader<LongWritable, BytesWritable> createRecordReader(
    InputSplit split, final TaskAttemptContext taskContext)
    throws IOException, InterruptedException {
  return new SequenceFileRecordReader<LongWritable, BytesWritable>();
}

Source File: InputSampler.java From big-c with Apache License 2.0

5 votes

/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, Job job) 
    throws IOException, InterruptedException {
  List<InputSplit> splits = inf.getSplits(job);
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.size());
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(
        job.getConfiguration(), new TaskAttemptID());
    RecordReader<K,V> reader = inf.createRecordReader(
        splits.get(i), samplingContext);
    reader.initialize(splits.get(i), samplingContext);
    while (reader.nextKeyValue()) {
      samples.add(ReflectionUtils.copy(job.getConfiguration(),
                                       reader.getCurrentKey(), null));
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}

Source File: MneInputFormat.java From mnemonic with Apache License 2.0

5 votes

@Override
public RecordReader<NullWritable, MV> createRecordReader(InputSplit inputSplit,
                   TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
  MneMapreduceRecordReader<MV, V> reader = new MneMapreduceRecordReader<MV, V>();
  reader.initialize(inputSplit, taskAttemptContext);
  return reader;
}

Source File: PigOutputCommitter.java From spork with Apache License 2.0

5 votes

/**
 * @param context
 * @param mapStores 
 * @param reduceStores 
 * @throws IOException
 */
public PigOutputCommitter(TaskAttemptContext context,
        List<POStore> mapStores, List<POStore> reduceStores)
        throws IOException {
    // create and store the map and reduce output committers
    mapOutputCommitters = getCommitters(context, mapStores);
    reduceOutputCommitters = getCommitters(context, reduceStores);
    recoverySupported = context.getConfiguration().getBoolean(PigConfiguration.PIG_OUTPUT_COMMITTER_RECOVERY, false);
}

Source File: NYCTLCColumnBasedHandlerTest.java From datawave with Apache License 2.0

5 votes

private NYCTLCReader getNYCTLCRecordReader(String file) throws IOException, URISyntaxException {
    InputSplit split = ColumnBasedHandlerTestUtil.getSplit(file);
    TaskAttemptContext ctx = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    TypeRegistry.reset();
    TypeRegistry.getInstance(ctx.getConfiguration());
    log.debug(TypeRegistry.getContents());
    NYCTLCReader reader = new NYCTLCReader();
    reader.initialize(split, ctx);
    return reader;
}

Source File: JsonFileRecordReader.java From jumbune with GNU Lesser General Public License v3.0

5 votes

@Override
public void initialize(InputSplit arg0, TaskAttemptContext arg1)
		throws IOException, InterruptedException {
	startJsonCurlyTag = ("{").getBytes(Charsets.UTF_8);
	endJsonCurlyTag = ("}").getBytes(Charsets.UTF_8);
	startJsonSquareTag = ("[").getBytes(Charsets.UTF_8);
	endJsonSquareTag = ("]").getBytes(Charsets.UTF_8);
}

Source File: DGALongEdgeValueInputFormatTest.java From distributed-graph-analytics with Apache License 2.0

5 votes

public EdgeReader<Text, LongWritable> createEdgeReader(final RecordReader<LongWritable,Text> rr) throws IOException {
    return new DGALongEdgeValueReader(){
        @Override
        protected RecordReader<LongWritable, Text> createLineRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
            return rr;
        }
    };
}

Source File: MultiOutputFormat.java From elasticsearch-hadoop with Apache License 2.0

5 votes

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
    List<OutputFormat> formats = getNewApiFormats(CompatHandler.taskAttemptContext(context).getConfiguration());
    List<OutputCommitter> committers = new ArrayList<OutputCommitter>();
    for (OutputFormat format : formats) {
        committers.add(format.getOutputCommitter(context));
    }

    return new MultiNewOutputCommitter(committers);
}

Source File: ColumnarSplitDataReader.java From kylin with Apache License 2.0

5 votes

public void init(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
    rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid);

    FileSystem fs = FileSystem.get(context.getConfiguration());
    FileSplit fSplit = (FileSplit) split;
    Path path = fSplit.getPath();
    rowRecordReader = new RowRecordReader(cubeDesc, path, fs);
    metricsValuesBuffer = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);

    rowCount = new AtomicInteger(0);
}

Source File: KeyIgnoringAnySAMOutputFormat.java From Hadoop-BAM with MIT License

5 votes

public RecordWriter<K,SAMRecordWritable> getRecordWriter(
		TaskAttemptContext ctx, Path out)
	throws IOException
{
	if (this.header == null)
		throw new IOException(
			"Can't create a RecordWriter without the SAM header");

	final boolean writeHeader = ctx.getConfiguration().getBoolean(
		WRITE_HEADER_PROPERTY, true);

	switch (format) {
		case BAM:
			return new KeyIgnoringBAMRecordWriter<K>(
				out, header, writeHeader, ctx);

		case SAM:
			return new KeyIgnoringSAMRecordWriter<K>(
					out, header, writeHeader, ctx);

		case CRAM:
			return new KeyIgnoringCRAMRecordWriter<K>(
					out, header, writeHeader, ctx);

		default: assert false; return null;
	}
}

Source File: CombineShimRecordReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

@Override
public void initialize(InputSplit curSplit, TaskAttemptContext curContext)
    throws IOException, InterruptedException {
  this.split = (CombineFileSplit) curSplit;
  this.context = curContext;

  if (null == rr) {
    createChildReader();
  }

  FileSplit fileSplit = new FileSplit(this.split.getPath(index),
      this.split.getOffset(index), this.split.getLength(index),
      this.split.getLocations());
  this.rr.initialize(fileSplit, this.context);
}

Source File: MainframeDatasetRecordReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

@Override
public void initialize(InputSplit inputSplit,
    TaskAttemptContext taskAttemptContext)
    throws IOException, InterruptedException {

  split = (MainframeDatasetInputSplit)inputSplit;
  conf = taskAttemptContext.getConfiguration();
  inputClass = (Class<T>) (conf.getClass(
              DBConfiguration.INPUT_CLASS_PROPERTY, null));
  key = null;
  datasetRecord = null;
  numberRecordRead = 0;
  datasetProcessed = 0;
}

Source File: TestJobOutputCommitter.java From hadoop with Apache License 2.0

5 votes

public synchronized OutputCommitter getOutputCommitter(
    TaskAttemptContext context) throws IOException {
  if (committer == null) {
    Path output = getOutputPath(context);
    committer = new CommitterWithCustomAbort(output, context);
  }
  return committer;
}

Source File: TestFileOutputCommitter.java From hadoop with Apache License 2.0

5 votes

private void writeMapFileOutput(RecordWriter theRecordWriter,
    TaskAttemptContext context) throws IOException, InterruptedException {
  try {
    int key = 0;
    for (int i = 0 ; i < 10; ++i) {
      key = i;
      Text val = (i%2 == 1) ? val1 : val2;
      theRecordWriter.write(new LongWritable(key),
          val);        
    }
  } finally {
    theRecordWriter.close(context);
  }
}

Source File: CSVReaderBase.java From datawave with Apache License 2.0

5 votes

@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
    super.initialize(genericSplit, context);
    setInputDate(System.currentTimeMillis());
    initializeRawFileName(genericSplit);
    initializeTotalSize(genericSplit);
}

org.apache.hadoop.mapreduce.TaskAttemptContext Java Examples