org.apache.hadoop.mapreduce.TaskAttemptContext Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.TaskAttemptContext.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WikipediaRecordReader.java From datawave with Apache License 2.0 | 6 votes |
private void initializeSuperClass(InputSplit split, TaskAttemptContext context) throws IOException { super.initialize(split, context); if (split instanceof FileSplit) { FileSplit fs = (FileSplit) split; Path p = fs.getPath(); rawFileName = p.getName(); if (log.isDebugEnabled()) { log.debug("FileSplit Info: "); log.debug("Start: " + fs.getStart()); log.debug("Length: " + fs.getLength()); log.debug("Locations: " + Arrays.toString(fs.getLocations())); log.debug("Path: " + fs.getPath()); } } else { throw new IOException("Input Split unhandled."); } }
Example #2
Source File: MapReduceParsedInputFormat.java From incubator-retired-mrql with Apache License 2.0 | 6 votes |
public ParsedRecordReader ( FileSplit split, TaskAttemptContext context, Class<? extends Parser> parser_class, Trees args ) throws IOException { Configuration conf = context.getConfiguration(); start = split.getStart(); end = start + split.getLength(); Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); fsin = fs.open(split.getPath()); try { parser = parser_class.newInstance(); } catch (Exception ex) { throw new Error("Unrecognized parser:"+parser_class); }; parser.initialize(args); parser.open(fsin,start,end); result = null; }
Example #3
Source File: DBOutputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { DBConfiguration dbConf = new DBConfiguration(context.getConfiguration()); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if(fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; } try { Connection connection = dbConf.getConnection(); PreparedStatement statement = null; statement = connection.prepareStatement( constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement); } catch (Exception ex) { throw new IOException(ex.getMessage()); } }
Example #4
Source File: UpdateOutputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
public UpdateRecordWriter(TaskAttemptContext context) throws ClassNotFoundException, SQLException { super(context); Configuration conf = getConf(); DBConfiguration dbConf = new DBConfiguration(conf); this.tableName = dbConf.getOutputTableName(); this.columnNames = dbConf.getOutputFieldNames(); String updateKeyColumns = conf.get(ExportJobBase.SQOOP_EXPORT_UPDATE_COL_KEY); Set<String> updateKeys = new LinkedHashSet<String>(); StringTokenizer stok = new StringTokenizer(updateKeyColumns, ","); while (stok.hasMoreTokens()) { String nextUpdateKey = stok.nextToken().trim(); if (nextUpdateKey.length() > 0) { updateKeys.add(nextUpdateKey); } else { throw new RuntimeException("Invalid update key column value specified" + ": '" + updateKeyColumns + "'"); } } updateCols = updateKeys.toArray(new String[updateKeys.size()]); }
Example #5
Source File: ArchiveWriter.java From marklogic-contentpump with Apache License 2.0 | 6 votes |
@Override public void close(TaskAttemptContext arg0) throws IOException, InterruptedException { if (txtArchive != null) { txtArchive.close(); } if (xmlArchive != null) { xmlArchive.close(); } if (jsonArchive != null) { jsonArchive.close(); } if (binaryArchive != null) { binaryArchive.close(); } }
Example #6
Source File: MrsPyramidRecordReader.java From mrgeo with Apache License 2.0 | 6 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (split instanceof MrsPyramidInputSplit) { MrsPyramidInputSplit fsplit = (MrsPyramidInputSplit) split; ifContext = ImageInputFormatContext.load(context.getConfiguration()); if (ifContext.getBounds() != null) { inputBounds = ifContext.getBounds(); } scannedInputReader = createRecordReader(fsplit, context); tilesize = ifContext.getTileSize(); zoomLevel = ifContext.getZoomLevel(); } else { throw new IOException("Got a split of type " + split.getClass().getCanonicalName() + " but expected one of type " + MrsPyramidInputSplit.class.getCanonicalName()); } }
Example #7
Source File: BulkInputFormat.java From datawave with Apache License 2.0 | 6 votes |
@Override public RecordReader<Key,Value> createRecordReader(InputSplit split, TaskAttemptContext context) { return new RecordReaderBase<Key,Value>() { @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (scannerIterator.hasNext()) { ++numKeysRead; Entry<Key,Value> entry = scannerIterator.next(); currentK = currentKey = entry.getKey(); currentV = currentValue = entry.getValue(); if (log.isTraceEnabled()) log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true)); return true; } else if (numKeysRead < 0) { numKeysRead = 0; } return false; } }; }
Example #8
Source File: TestFileOutputCommitter.java From big-c with Apache License 2.0 | 5 votes |
public void testInvalidVersionNumber() throws IOException { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); try { new FileOutputCommitter(outDir, tContext); fail("should've thrown an exception!"); } catch (IOException e) { //test passed } }
Example #9
Source File: MultiTableOutputFormat.java From hbase with Apache License 2.0 | 5 votes |
@Override public void close(TaskAttemptContext context) throws IOException { for (BufferedMutator mutator : mutatorMap.values()) { mutator.close(); } if (connection != null) { connection.close(); } }
Example #10
Source File: TezGroupedSplitsInputFormat.java From incubator-tez with Apache License 2.0 | 5 votes |
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { TezGroupedSplit groupedSplit = (TezGroupedSplit) split; initInputFormatFromSplit(groupedSplit); return new TezGroupedSplitsRecordReader(groupedSplit, context); }
Example #11
Source File: MapTask.java From RDFS with Apache License 2.0 | 5 votes |
@Override public void close(TaskAttemptContext context ) throws IOException,InterruptedException { try { collector.flush(); } catch (ClassNotFoundException cnf) { throw new IOException("can't find class ", cnf); } collector.close(); }
Example #12
Source File: DatasetKeyInputFormat.java From kite with Apache License 2.0 | 5 votes |
@Override @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR", justification="Delegate set by setConf") public RecordReader<E, Void> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { Configuration conf = Hadoop.TaskAttemptContext.getConfiguration.invoke(taskAttemptContext); DefaultConfiguration.init(conf); return delegate.createRecordReader(inputSplit, taskAttemptContext); }
Example #13
Source File: TestLineRecordReader.java From hadoop with Apache License 2.0 | 5 votes |
@Test public void testStripBOM() throws IOException { // the test data contains a BOM at the start of the file // confirm the BOM is skipped by LineRecordReader String UTF8_BOM = "\uFEFF"; URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt"); assertNotNull("Cannot find testBOM.txt", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[])null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); int numRecords = 0; boolean firstLine = true; boolean skipBOM = true; while (reader.nextKeyValue()) { if (firstLine) { firstLine = false; if (reader.getCurrentValue().toString().startsWith(UTF8_BOM)) { skipBOM = false; } } ++numRecords; } reader.close(); assertTrue("BOM is not skipped", skipBOM); }
Example #14
Source File: PgVectorRecordReader.java From mrgeo with Apache License 2.0 | 5 votes |
@SuppressFBWarnings(value = {"SQL_INJECTION_JDBC", "SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING"}, justification = "User supplied queries are a requirement") @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (!(split instanceof PgInputSplit)) { throw new IOException("Expected an instance of PgInputSplit"); } offset = ((PgInputSplit) split).getOffset(); limit = ((PgInputSplit) split).getLimit(); currIndex = offset - 1; try { conn = PgVectorDataProvider.getDbConnection(dbSettings); // If the offset is < 0, then there is only one partition, so no need // for a limit query. String fullQuery = (offset < 0) ? dbSettings.getQuery() : (dbSettings.getQuery() + " OFFSET " + offset + " LIMIT " + limit); stmt = conn.prepareStatement(fullQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); rs = ((PreparedStatement) stmt).executeQuery(); ResultSetMetaData metadata = rs.getMetaData(); columnCount = metadata.getColumnCount(); columnLabels = new String[columnCount]; for (int c=1; c <= columnCount; c++) { columnLabels[c-1] = metadata.getColumnLabel(c); } } catch (SQLException e) { throw new IOException("Could not open database.", e); } }
Example #15
Source File: ErrorShardedDataTypeHandler.java From datawave with Apache License 2.0 | 5 votes |
@Override public void setup(TaskAttemptContext context) { IngestConfiguration ingestConfiguration = IngestConfigurationFactory.getIngestConfiguration(); markingsHelper = ingestConfiguration.getMarkingsHelper(context.getConfiguration(), TypeRegistry.getType(TypeRegistry.ERROR_PREFIX)); super.setup(context); this.errorHelper = (ErrorShardedIngestHelper) (TypeRegistry.getType("error").getIngestHelper(context.getConfiguration())); this.errorHelper.setDelegateHelper(this.helper); this.helper = this.errorHelper; this.conf = context.getConfiguration(); this.setupDictionaryCache(conf.getInt(ERROR_PROP_PREFIX + SHARD_DICTIONARY_CACHE_ENTRIES, ShardedDataTypeHandler.SHARD_DINDEX_CACHE_DEFAULT_SIZE)); setShardTableName(new Text(ConfigurationHelper.isNull(conf, ERROR_PROP_PREFIX + SHARD_TNAME, String.class))); String tableName = conf.get(ERROR_PROP_PREFIX + SHARD_GIDX_TNAME); setShardIndexTableName(tableName == null ? null : new Text(tableName)); tableName = conf.get(ERROR_PROP_PREFIX + SHARD_GRIDX_TNAME); setShardReverseIndexTableName(tableName == null ? null : new Text(tableName)); tableName = conf.get(ERROR_PROP_PREFIX + METADATA_TABLE_NAME); if (tableName == null) { setMetadataTableName(null); setMetadata(null); } else { setMetadataTableName(new Text(tableName)); setMetadata(ingestConfiguration.createMetadata(getShardTableName(), getMetadataTableName(), null /* no load date table */, getShardIndexTableName(), getShardReverseIndexTableName(), conf.getBoolean(ERROR_PROP_PREFIX + METADATA_TERM_FREQUENCY, false))); } tableName = conf.get(ERROR_PROP_PREFIX + SHARD_DINDX_NAME); setShardDictionaryIndexTableName(tableName == null ? null : new Text(tableName)); try { defaultVisibility = flatten(markingFunctions.translateToColumnVisibility(markingsHelper.getDefaultMarkings())); } catch (Exception e) { throw new IllegalArgumentException("Failed to parse security marking configuration", e); } log.info("ShardedErrorDataTypeHandler configured."); }
Example #16
Source File: GenerateDistCacheData.java From hadoop with Apache License 2.0 | 5 votes |
/** * Returns a reader for this split of the distributed cache file list. */ @Override public RecordReader<LongWritable, BytesWritable> createRecordReader( InputSplit split, final TaskAttemptContext taskContext) throws IOException, InterruptedException { return new SequenceFileRecordReader<LongWritable, BytesWritable>(); }
Example #17
Source File: InputSampler.java From big-c with Apache License 2.0 | 5 votes |
/** * From each split sampled, take the first numSamples / numSplits records. */ @SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type public K[] getSample(InputFormat<K,V> inf, Job job) throws IOException, InterruptedException { List<InputSplit> splits = inf.getSplits(job); ArrayList<K> samples = new ArrayList<K>(numSamples); int splitsToSample = Math.min(maxSplitsSampled, splits.size()); int samplesPerSplit = numSamples / splitsToSample; long records = 0; for (int i = 0; i < splitsToSample; ++i) { TaskAttemptContext samplingContext = new TaskAttemptContextImpl( job.getConfiguration(), new TaskAttemptID()); RecordReader<K,V> reader = inf.createRecordReader( splits.get(i), samplingContext); reader.initialize(splits.get(i), samplingContext); while (reader.nextKeyValue()) { samples.add(ReflectionUtils.copy(job.getConfiguration(), reader.getCurrentKey(), null)); ++records; if ((i+1) * samplesPerSplit <= records) { break; } } reader.close(); } return (K[])samples.toArray(); }
Example #18
Source File: MneInputFormat.java From mnemonic with Apache License 2.0 | 5 votes |
@Override public RecordReader<NullWritable, MV> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { MneMapreduceRecordReader<MV, V> reader = new MneMapreduceRecordReader<MV, V>(); reader.initialize(inputSplit, taskAttemptContext); return reader; }
Example #19
Source File: PigOutputCommitter.java From spork with Apache License 2.0 | 5 votes |
/** * @param context * @param mapStores * @param reduceStores * @throws IOException */ public PigOutputCommitter(TaskAttemptContext context, List<POStore> mapStores, List<POStore> reduceStores) throws IOException { // create and store the map and reduce output committers mapOutputCommitters = getCommitters(context, mapStores); reduceOutputCommitters = getCommitters(context, reduceStores); recoverySupported = context.getConfiguration().getBoolean(PigConfiguration.PIG_OUTPUT_COMMITTER_RECOVERY, false); }
Example #20
Source File: NYCTLCColumnBasedHandlerTest.java From datawave with Apache License 2.0 | 5 votes |
private NYCTLCReader getNYCTLCRecordReader(String file) throws IOException, URISyntaxException { InputSplit split = ColumnBasedHandlerTestUtil.getSplit(file); TaskAttemptContext ctx = new TaskAttemptContextImpl(conf, new TaskAttemptID()); TypeRegistry.reset(); TypeRegistry.getInstance(ctx.getConfiguration()); log.debug(TypeRegistry.getContents()); NYCTLCReader reader = new NYCTLCReader(); reader.initialize(split, ctx); return reader; }
Example #21
Source File: JsonFileRecordReader.java From jumbune with GNU Lesser General Public License v3.0 | 5 votes |
@Override public void initialize(InputSplit arg0, TaskAttemptContext arg1) throws IOException, InterruptedException { startJsonCurlyTag = ("{").getBytes(Charsets.UTF_8); endJsonCurlyTag = ("}").getBytes(Charsets.UTF_8); startJsonSquareTag = ("[").getBytes(Charsets.UTF_8); endJsonSquareTag = ("]").getBytes(Charsets.UTF_8); }
Example #22
Source File: DGALongEdgeValueInputFormatTest.java From distributed-graph-analytics with Apache License 2.0 | 5 votes |
public EdgeReader<Text, LongWritable> createEdgeReader(final RecordReader<LongWritable,Text> rr) throws IOException { return new DGALongEdgeValueReader(){ @Override protected RecordReader<LongWritable, Text> createLineRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { return rr; } }; }
Example #23
Source File: MultiOutputFormat.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { List<OutputFormat> formats = getNewApiFormats(CompatHandler.taskAttemptContext(context).getConfiguration()); List<OutputCommitter> committers = new ArrayList<OutputCommitter>(); for (OutputFormat format : formats) { committers.add(format.getOutputCommitter(context)); } return new MultiNewOutputCommitter(committers); }
Example #24
Source File: ColumnarSplitDataReader.java From kylin with Apache License 2.0 | 5 votes |
public void init(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { baseCuboid = Cuboid.getBaseCuboid(cubeDesc); rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid); FileSystem fs = FileSystem.get(context.getConfiguration()); FileSplit fSplit = (FileSplit) split; Path path = fSplit.getPath(); rowRecordReader = new RowRecordReader(cubeDesc, path, fs); metricsValuesBuffer = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE); rowCount = new AtomicInteger(0); }
Example #25
Source File: KeyIgnoringAnySAMOutputFormat.java From Hadoop-BAM with MIT License | 5 votes |
public RecordWriter<K,SAMRecordWritable> getRecordWriter( TaskAttemptContext ctx, Path out) throws IOException { if (this.header == null) throw new IOException( "Can't create a RecordWriter without the SAM header"); final boolean writeHeader = ctx.getConfiguration().getBoolean( WRITE_HEADER_PROPERTY, true); switch (format) { case BAM: return new KeyIgnoringBAMRecordWriter<K>( out, header, writeHeader, ctx); case SAM: return new KeyIgnoringSAMRecordWriter<K>( out, header, writeHeader, ctx); case CRAM: return new KeyIgnoringCRAMRecordWriter<K>( out, header, writeHeader, ctx); default: assert false; return null; } }
Example #26
Source File: CombineShimRecordReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override public void initialize(InputSplit curSplit, TaskAttemptContext curContext) throws IOException, InterruptedException { this.split = (CombineFileSplit) curSplit; this.context = curContext; if (null == rr) { createChildReader(); } FileSplit fileSplit = new FileSplit(this.split.getPath(index), this.split.getOffset(index), this.split.getLength(index), this.split.getLocations()); this.rr.initialize(fileSplit, this.context); }
Example #27
Source File: MainframeDatasetRecordReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { split = (MainframeDatasetInputSplit)inputSplit; conf = taskAttemptContext.getConfiguration(); inputClass = (Class<T>) (conf.getClass( DBConfiguration.INPUT_CLASS_PROPERTY, null)); key = null; datasetRecord = null; numberRecordRead = 0; datasetProcessed = 0; }
Example #28
Source File: TestJobOutputCommitter.java From hadoop with Apache License 2.0 | 5 votes |
public synchronized OutputCommitter getOutputCommitter( TaskAttemptContext context) throws IOException { if (committer == null) { Path output = getOutputPath(context); committer = new CommitterWithCustomAbort(output, context); } return committer; }
Example #29
Source File: TestFileOutputCommitter.java From hadoop with Apache License 2.0 | 5 votes |
private void writeMapFileOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { try { int key = 0; for (int i = 0 ; i < 10; ++i) { key = i; Text val = (i%2 == 1) ? val1 : val2; theRecordWriter.write(new LongWritable(key), val); } } finally { theRecordWriter.close(context); } }
Example #30
Source File: CSVReaderBase.java From datawave with Apache License 2.0 | 5 votes |
@Override public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException { super.initialize(genericSplit, context); setInputDate(System.currentTimeMillis()); initializeRawFileName(genericSplit); initializeTotalSize(genericSplit); }