org.apache.hadoop.mapred.RawKeyValueIterator Java Examples
The following examples show how to use
org.apache.hadoop.mapred.RawKeyValueIterator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReduceContext.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public ReduceContext(Configuration conf, TaskAttemptID taskid, RawKeyValueIterator input, Counter inputCounter, RecordWriter<KEYOUT,VALUEOUT> output, OutputCommitter committer, StatusReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valueClass ) throws InterruptedException, IOException{ super(conf, taskid, output, committer, reporter); this.input = input; this.inputCounter = inputCounter; this.comparator = comparator; SerializationFactory serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(buffer); this.valueDeserializer = serializationFactory.getDeserializer(valueClass); this.valueDeserializer.open(buffer); hasMore = input.next(); }
Example #2
Source File: MergeManagerImpl.java From big-c with Apache License 2.0 | 6 votes |
private void combineAndSpill( RawKeyValueIterator kvIter, Counters.Counter inCounter) throws IOException { JobConf job = jobConf; Reducer combiner = ReflectionUtils.newInstance(combinerClass, job); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); Class<V> valClass = (Class<V>) job.getMapOutputValueClass(); RawComparator<K> comparator = (RawComparator<K>)job.getCombinerKeyGroupingComparator(); try { CombineValuesIterator values = new CombineValuesIterator( kvIter, comparator, keyClass, valClass, job, Reporter.NULL, inCounter); while (values.more()) { combiner.reduce(values.getKey(), values, combineCollector, Reporter.NULL); values.nextKey(); } } finally { combiner.close(); } }
Example #3
Source File: MergeManagerImpl.java From big-c with Apache License 2.0 | 6 votes |
@Override public RawKeyValueIterator close() throws Throwable { // Wait for on-going merges to complete if (memToMemMerger != null) { memToMemMerger.close(); } inMemoryMerger.close(); onDiskMerger.close(); List<InMemoryMapOutput<K, V>> memory = new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs); inMemoryMergedMapOutputs.clear(); memory.addAll(inMemoryMapOutputs); inMemoryMapOutputs.clear(); List<CompressAwarePath> disk = new ArrayList<CompressAwarePath>(onDiskMapOutputs); onDiskMapOutputs.clear(); return finalMerge(jobConf, rfs, memory, disk); }
Example #4
Source File: ReduceContext.java From RDFS with Apache License 2.0 | 6 votes |
public ReduceContext(Configuration conf, TaskAttemptID taskid, RawKeyValueIterator input, Counter inputKeyCounter, Counter inputValueCounter, RecordWriter<KEYOUT,VALUEOUT> output, OutputCommitter committer, StatusReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valueClass ) throws InterruptedException, IOException{ super(conf, taskid, output, committer, reporter); this.input = input; this.inputKeyCounter = inputKeyCounter; this.inputValueCounter = inputValueCounter; this.comparator = comparator; SerializationFactory serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(buffer); this.valueDeserializer = serializationFactory.getDeserializer(valueClass); this.valueDeserializer.open(buffer); hasMore = input.next(); }
Example #5
Source File: MergeManagerImpl.java From hadoop with Apache License 2.0 | 6 votes |
private void combineAndSpill( RawKeyValueIterator kvIter, Counters.Counter inCounter) throws IOException { JobConf job = jobConf; Reducer combiner = ReflectionUtils.newInstance(combinerClass, job); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); Class<V> valClass = (Class<V>) job.getMapOutputValueClass(); RawComparator<K> comparator = (RawComparator<K>)job.getCombinerKeyGroupingComparator(); try { CombineValuesIterator values = new CombineValuesIterator( kvIter, comparator, keyClass, valClass, job, Reporter.NULL, inCounter); while (values.more()) { combiner.reduce(values.getKey(), values, combineCollector, Reporter.NULL); values.nextKey(); } } finally { combiner.close(); } }
Example #6
Source File: MergeManagerImpl.java From hadoop with Apache License 2.0 | 6 votes |
@Override public RawKeyValueIterator close() throws Throwable { // Wait for on-going merges to complete if (memToMemMerger != null) { memToMemMerger.close(); } inMemoryMerger.close(); onDiskMerger.close(); List<InMemoryMapOutput<K, V>> memory = new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs); inMemoryMergedMapOutputs.clear(); memory.addAll(inMemoryMapOutputs); inMemoryMapOutputs.clear(); List<CompressAwarePath> disk = new ArrayList<CompressAwarePath>(onDiskMapOutputs); onDiskMapOutputs.clear(); return finalMerge(jobConf, rfs, memory, disk); }
Example #7
Source File: Reducer.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public Context(Configuration conf, TaskAttemptID taskid, RawKeyValueIterator input, Counter inputCounter, RecordWriter<KEYOUT,VALUEOUT> output, OutputCommitter committer, StatusReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valueClass ) throws IOException, InterruptedException { super(conf, taskid, input, inputCounter, output, committer, reporter, comparator, keyClass, valueClass); }
Example #8
Source File: MergeManagerImpl.java From hadoop with Apache License 2.0 | 5 votes |
@Override public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException { if (inputs == null || inputs.size() == 0) { return; } TaskAttemptID dummyMapId = inputs.get(0).getMapId(); List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>(); long mergeOutputSize = createInMemorySegments(inputs, inMemorySegments, 0); int noInMemorySegments = inMemorySegments.size(); InMemoryMapOutput<K, V> mergedMapOutputs = unconditionalReserve(dummyMapId, mergeOutputSize, false); Writer<K, V> writer = new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream()); LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments + " segments of total-size: " + mergeOutputSize); RawKeyValueIterator rIter = Merger.merge(jobConf, rfs, (Class<K>)jobConf.getMapOutputKeyClass(), (Class<V>)jobConf.getMapOutputValueClass(), inMemorySegments, inMemorySegments.size(), new Path(reduceId.toString()), (RawComparator<K>)jobConf.getOutputKeyComparator(), reporter, null, null, null); Merger.writeFile(rIter, writer, reporter, jobConf); writer.close(); LOG.info(reduceId + " Memory-to-Memory merge of the " + noInMemorySegments + " files in-memory complete."); // Note the output of the merge closeInMemoryMergedFile(mergedMapOutputs); }
Example #9
Source File: Reducer.java From RDFS with Apache License 2.0 | 5 votes |
public Context(Configuration conf, TaskAttemptID taskid, RawKeyValueIterator input, Counter inputKeyCounter, Counter inputValueCounter, RecordWriter<KEYOUT,VALUEOUT> output, OutputCommitter committer, StatusReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valueClass ) throws IOException, InterruptedException { super(conf, taskid, input, inputKeyCounter, inputValueCounter, output, committer, reporter, comparator, keyClass, valueClass); }
Example #10
Source File: ReduceContextImpl.java From big-c with Apache License 2.0 | 5 votes |
public ReduceContextImpl(Configuration conf, TaskAttemptID taskid, RawKeyValueIterator input, Counter inputKeyCounter, Counter inputValueCounter, RecordWriter<KEYOUT,VALUEOUT> output, OutputCommitter committer, StatusReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valueClass ) throws InterruptedException, IOException{ super(conf, taskid, output, committer, reporter); this.input = input; this.inputKeyCounter = inputKeyCounter; this.inputValueCounter = inputValueCounter; this.comparator = comparator; this.serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(buffer); this.valueDeserializer = serializationFactory.getDeserializer(valueClass); this.valueDeserializer.open(buffer); hasMore = input.next(); this.keyClass = keyClass; this.valueClass = valueClass; this.conf = conf; this.taskid = taskid; }
Example #11
Source File: MergeManagerImpl.java From big-c with Apache License 2.0 | 5 votes |
@Override public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException { if (inputs == null || inputs.size() == 0) { return; } TaskAttemptID dummyMapId = inputs.get(0).getMapId(); List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>(); long mergeOutputSize = createInMemorySegments(inputs, inMemorySegments, 0); int noInMemorySegments = inMemorySegments.size(); InMemoryMapOutput<K, V> mergedMapOutputs = unconditionalReserve(dummyMapId, mergeOutputSize, false); Writer<K, V> writer = new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream()); LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments + " segments of total-size: " + mergeOutputSize); RawKeyValueIterator rIter = Merger.merge(jobConf, rfs, (Class<K>)jobConf.getMapOutputKeyClass(), (Class<V>)jobConf.getMapOutputValueClass(), inMemorySegments, inMemorySegments.size(), new Path(reduceId.toString()), (RawComparator<K>)jobConf.getOutputKeyComparator(), reporter, null, null, null); Merger.writeFile(rIter, writer, reporter, jobConf); writer.close(); LOG.info(reduceId + " Memory-to-Memory merge of the " + noInMemorySegments + " files in-memory complete."); // Note the output of the merge closeInMemoryMergedFile(mergedMapOutputs); }
Example #12
Source File: ReduceContextImpl.java From hadoop with Apache License 2.0 | 5 votes |
public ReduceContextImpl(Configuration conf, TaskAttemptID taskid, RawKeyValueIterator input, Counter inputKeyCounter, Counter inputValueCounter, RecordWriter<KEYOUT,VALUEOUT> output, OutputCommitter committer, StatusReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valueClass ) throws InterruptedException, IOException{ super(conf, taskid, output, committer, reporter); this.input = input; this.inputKeyCounter = inputKeyCounter; this.inputValueCounter = inputValueCounter; this.comparator = comparator; this.serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(buffer); this.valueDeserializer = serializationFactory.getDeserializer(valueClass); this.valueDeserializer.open(buffer); hasMore = input.next(); this.keyClass = keyClass; this.valueClass = valueClass; this.conf = conf; this.taskid = taskid; }
Example #13
Source File: MRCombiner.java From tez with Apache License 2.0 | 4 votes |
private static <KEYIN, VALUEIN, KEYOUT, VALUEOUT> org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext( Configuration conf, TaskAttemptID mrTaskAttemptID, final TezRawKeyValueIterator rawIter, Counter combineInputRecordsCounter, Counter combineOutputRecordsCounter, RecordWriter<KEYOUT, VALUEOUT> recordWriter, MRTaskReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valClass) throws InterruptedException, IOException { RawKeyValueIterator r = new RawKeyValueIterator() { @Override public boolean next() throws IOException { return rawIter.next(); } @Override public DataInputBuffer getValue() throws IOException { return rawIter.getValue(); } @Override public Progress getProgress() { return rawIter.getProgress(); } @Override public DataInputBuffer getKey() throws IOException { return rawIter.getKey(); } @Override public void close() throws IOException { rawIter.close(); } }; ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> rContext = new ReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>( conf, mrTaskAttemptID, r, null, combineInputRecordsCounter, recordWriter, null, reporter, comparator, keyClass, valClass); org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>() .getReducerContext(rContext); return reducerContext; }
Example #14
Source File: MRTask.java From tez with Apache License 2.0 | 4 votes |
protected static <INKEY,INVALUE,OUTKEY,OUTVALUE> org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context createReduceContext(org.apache.hadoop.mapreduce.Reducer <INKEY,INVALUE,OUTKEY,OUTVALUE> reducer, Configuration job, TaskAttemptID taskId, final TezRawKeyValueIterator rIter, org.apache.hadoop.mapreduce.Counter inputKeyCounter, org.apache.hadoop.mapreduce.Counter inputValueCounter, org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> output, org.apache.hadoop.mapreduce.OutputCommitter committer, org.apache.hadoop.mapreduce.StatusReporter reporter, RawComparator<INKEY> comparator, Class<INKEY> keyClass, Class<INVALUE> valueClass ) throws IOException, InterruptedException { RawKeyValueIterator r = new RawKeyValueIterator() { @Override public boolean next() throws IOException { return rIter.next(); } @Override public DataInputBuffer getValue() throws IOException { return rIter.getValue(); } @Override public Progress getProgress() { return rIter.getProgress(); } @Override public DataInputBuffer getKey() throws IOException { return rIter.getKey(); } @Override public void close() throws IOException { rIter.close(); } }; org.apache.hadoop.mapreduce.ReduceContext<INKEY, INVALUE, OUTKEY, OUTVALUE> reduceContext = new ReduceContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>( job, taskId, r, inputKeyCounter, inputValueCounter, output, committer, reporter, comparator, keyClass, valueClass); if (LOG.isDebugEnabled()) { LOG.debug("Using key class: " + keyClass + ", valueClass: " + valueClass); } org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context reducerContext = new WrappedReducer<INKEY, INVALUE, OUTKEY, OUTVALUE>().getReducerContext( reduceContext); return reducerContext; }
Example #15
Source File: MRCombiner.java From incubator-tez with Apache License 2.0 | 4 votes |
private static <KEYIN, VALUEIN, KEYOUT, VALUEOUT> org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext( Configuration conf, TaskAttemptID mrTaskAttemptID, final TezRawKeyValueIterator rawIter, Counter combineInputKeyCounter, Counter combineInputValueCounter, RecordWriter<KEYOUT, VALUEOUT> recordWriter, MRTaskReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valClass) throws InterruptedException, IOException { RawKeyValueIterator r = new RawKeyValueIterator() { @Override public boolean next() throws IOException { return rawIter.next(); } @Override public DataInputBuffer getValue() throws IOException { return rawIter.getValue(); } @Override public Progress getProgress() { return rawIter.getProgress(); } @Override public DataInputBuffer getKey() throws IOException { return rawIter.getKey(); } @Override public void close() throws IOException { rawIter.close(); } }; ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> rContext = new ReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>( conf, mrTaskAttemptID, r, combineInputKeyCounter, combineInputValueCounter, recordWriter, null, reporter, comparator, keyClass, valClass); org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>() .getReducerContext(rContext); return reducerContext; }
Example #16
Source File: MRTask.java From incubator-tez with Apache License 2.0 | 4 votes |
protected static <INKEY,INVALUE,OUTKEY,OUTVALUE> org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context createReduceContext(org.apache.hadoop.mapreduce.Reducer <INKEY,INVALUE,OUTKEY,OUTVALUE> reducer, Configuration job, TaskAttemptID taskId, final TezRawKeyValueIterator rIter, org.apache.hadoop.mapreduce.Counter inputKeyCounter, org.apache.hadoop.mapreduce.Counter inputValueCounter, org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> output, org.apache.hadoop.mapreduce.OutputCommitter committer, org.apache.hadoop.mapreduce.StatusReporter reporter, RawComparator<INKEY> comparator, Class<INKEY> keyClass, Class<INVALUE> valueClass ) throws IOException, InterruptedException { RawKeyValueIterator r = new RawKeyValueIterator() { @Override public boolean next() throws IOException { return rIter.next(); } @Override public DataInputBuffer getValue() throws IOException { return rIter.getValue(); } @Override public Progress getProgress() { return rIter.getProgress(); } @Override public DataInputBuffer getKey() throws IOException { return rIter.getKey(); } @Override public void close() throws IOException { rIter.close(); } }; org.apache.hadoop.mapreduce.ReduceContext<INKEY, INVALUE, OUTKEY, OUTVALUE> reduceContext = new ReduceContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>( job, taskId, r, inputKeyCounter, inputValueCounter, output, committer, reporter, comparator, keyClass, valueClass); if (LOG.isDebugEnabled()) { LOG.debug("Using key class: " + keyClass + ", valueClass: " + valueClass); } org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context reducerContext = new WrappedReducer<INKEY, INVALUE, OUTKEY, OUTVALUE>().getReducerContext( reduceContext); return reducerContext; }
Example #17
Source File: TestGridMixClasses.java From big-c with Apache License 2.0 | 4 votes |
@Test (timeout=3000) public void testSleepReducer() throws Exception { Configuration conf = new Configuration(); conf.setInt(JobContext.NUM_REDUCES, 2); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(FileOutputFormat.COMPRESS, true); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); TaskAttemptID taskId = new TaskAttemptID(); RawKeyValueIterator input = new FakeRawKeyValueReducerIterator(); Counter counter = new GenericCounter(); Counter inputValueCounter = new GenericCounter(); RecordWriter<NullWritable, NullWritable> output = new LoadRecordReduceWriter(); OutputCommitter committer = new CustomOutputCommitter(); StatusReporter reporter = new DummyReporter(); RawComparator<GridmixKey> comparator = new FakeRawComparator(); ReduceContext<GridmixKey, NullWritable, NullWritable, NullWritable> reducecontext = new ReduceContextImpl<GridmixKey, NullWritable, NullWritable, NullWritable>( conf, taskId, input, counter, inputValueCounter, output, committer, reporter, comparator, GridmixKey.class, NullWritable.class); org.apache.hadoop.mapreduce.Reducer<GridmixKey, NullWritable, NullWritable, NullWritable>.Context context = new WrappedReducer<GridmixKey, NullWritable, NullWritable, NullWritable>() .getReducerContext(reducecontext); SleepReducer test = new SleepReducer(); long start = System.currentTimeMillis(); test.setup(context); long sleeper = context.getCurrentKey().getReduceOutputBytes(); // status has been changed assertEquals("Sleeping... " + sleeper + " ms left", context.getStatus()); // should sleep 0.9 sec assertTrue(System.currentTimeMillis() >= (start + sleeper)); test.cleanup(context); // status has been changed again assertEquals("Slept for " + sleeper, context.getStatus()); }
Example #18
Source File: TestGridMixClasses.java From big-c with Apache License 2.0 | 4 votes |
@Test (timeout=3000) public void testLoadJobLoadReducer() throws Exception { LoadJob.LoadReducer test = new LoadJob.LoadReducer(); Configuration conf = new Configuration(); conf.setInt(JobContext.NUM_REDUCES, 2); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(FileOutputFormat.COMPRESS, true); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); TaskAttemptID taskid = new TaskAttemptID(); RawKeyValueIterator input = new FakeRawKeyValueIterator(); Counter counter = new GenericCounter(); Counter inputValueCounter = new GenericCounter(); LoadRecordWriter output = new LoadRecordWriter(); OutputCommitter committer = new CustomOutputCommitter(); StatusReporter reporter = new DummyReporter(); RawComparator<GridmixKey> comparator = new FakeRawComparator(); ReduceContext<GridmixKey, GridmixRecord, NullWritable, GridmixRecord> reduceContext = new ReduceContextImpl<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>( conf, taskid, input, counter, inputValueCounter, output, committer, reporter, comparator, GridmixKey.class, GridmixRecord.class); // read for previous data reduceContext.nextKeyValue(); org.apache.hadoop.mapreduce.Reducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>.Context context = new WrappedReducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>() .getReducerContext(reduceContext); // test.setup(context); test.run(context); // have been readed 9 records (-1 for previous) assertEquals(9, counter.getValue()); assertEquals(10, inputValueCounter.getValue()); assertEquals(1, output.getData().size()); GridmixRecord record = output.getData().values().iterator() .next(); assertEquals(1593, record.getSize()); }
Example #19
Source File: TestMerger.java From big-c with Apache License 2.0 | 4 votes |
@SuppressWarnings( { "unchecked" }) public void testMergeShouldReturnProperProgress( List<Segment<Text, Text>> segments) throws IOException { Path tmpDir = new Path("localpath"); Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass(); Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass(); RawComparator<Text> comparator = jobConf.getOutputKeyComparator(); Counter readsCounter = new Counter(); Counter writesCounter = new Counter(); Progress mergePhase = new Progress(); RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass, valueClass, segments, 2, tmpDir, comparator, getReporter(), readsCounter, writesCounter, mergePhase); final float epsilon = 0.00001f; // Reading 6 keys total, 3 each in 2 segments, so each key read moves the // progress forward 1/6th of the way. Initially the first keys from each // segment have been read as part of the merge setup, so progress = 2/6. Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon); // The first next() returns one of the keys already read during merge setup Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon); // Subsequent next() calls should read one key and move progress Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(3/6.0f, mergeQueue.getProgress().get(), epsilon); Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon); // At this point we've exhausted all of the keys in one segment // so getting the next key will return the already cached key from the // other segment Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon); // Subsequent next() calls should read one key and move progress Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(5/6.0f, mergeQueue.getProgress().get(), epsilon); Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon); // Now there should be no more input Assert.assertFalse(mergeQueue.next()); Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon); Assert.assertTrue(mergeQueue.getKey() == null); Assert.assertEquals(0, mergeQueue.getValue().getData().length); }
Example #20
Source File: MergeManagerImpl.java From big-c with Apache License 2.0 | 4 votes |
@Override public void merge(List<InMemoryMapOutput<K,V>> inputs) throws IOException { if (inputs == null || inputs.size() == 0) { return; } //name this output file same as the name of the first file that is //there in the current list of inmem files (this is guaranteed to //be absent on the disk currently. So we don't overwrite a prev. //created spill). Also we need to create the output file now since //it is not guaranteed that this file will be present after merge //is called (we delete empty files as soon as we see them //in the merge method) //figure out the mapId TaskAttemptID mapId = inputs.get(0).getMapId(); TaskID mapTaskId = mapId.getTaskID(); List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>(); long mergeOutputSize = createInMemorySegments(inputs, inMemorySegments,0); int noInMemorySegments = inMemorySegments.size(); Path outputPath = mapOutputFile.getInputFileForWrite(mapTaskId, mergeOutputSize).suffix( Task.MERGED_OUTPUT_PREFIX); FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath)); Writer<K, V> writer = new Writer<K, V>(jobConf, out, (Class<K>) jobConf.getMapOutputKeyClass(), (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true); RawKeyValueIterator rIter = null; CompressAwarePath compressAwarePath; try { LOG.info("Initiating in-memory merge with " + noInMemorySegments + " segments..."); rIter = Merger.merge(jobConf, rfs, (Class<K>)jobConf.getMapOutputKeyClass(), (Class<V>)jobConf.getMapOutputValueClass(), inMemorySegments, inMemorySegments.size(), new Path(reduceId.toString()), (RawComparator<K>)jobConf.getOutputKeyComparator(), reporter, spilledRecordsCounter, null, null); if (null == combinerClass) { Merger.writeFile(rIter, writer, reporter, jobConf); } else { combineCollector.setWriter(writer); combineAndSpill(rIter, reduceCombineInputCounter); } writer.close(); compressAwarePath = new CompressAwarePath(outputPath, writer.getRawLength(), writer.getCompressedLength()); LOG.info(reduceId + " Merge of the " + noInMemorySegments + " files in-memory complete." + " Local file is " + outputPath + " of size " + localFS.getFileStatus(outputPath).getLen()); } catch (IOException e) { //make sure that we delete the ondisk file that we created //earlier when we invoked cloneFileAttributes localFS.delete(outputPath, true); throw e; } // Note the output of the merge closeOnDiskFile(compressAwarePath); }
Example #21
Source File: TestShufflePlugin.java From big-c with Apache License 2.0 | 4 votes |
@Override public RawKeyValueIterator run() throws java.io.IOException, java.lang.InterruptedException{ return null; }
Example #22
Source File: Shuffle.java From big-c with Apache License 2.0 | 4 votes |
@Override public RawKeyValueIterator run() throws IOException, InterruptedException { // Scale the maximum events we fetch per RPC call to mitigate OOM issues // on the ApplicationMaster when a thundering herd of reducers fetch events // TODO: This should not be necessary after HADOOP-8942 int eventsPerReducer = Math.max(MIN_EVENTS_TO_FETCH, MAX_RPC_OUTSTANDING_EVENTS / jobConf.getNumReduceTasks()); int maxEventsToFetch = Math.min(MAX_EVENTS_TO_FETCH, eventsPerReducer); // Start the map-completion events fetcher thread final EventFetcher<K,V> eventFetcher = new EventFetcher<K,V>(reduceId, umbilical, scheduler, this, maxEventsToFetch); eventFetcher.start(); // Start the map-output fetcher threads boolean isLocal = localMapFiles != null; final int numFetchers = isLocal ? 1 : jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5); Fetcher<K,V>[] fetchers = new Fetcher[numFetchers]; if (isLocal) { fetchers[0] = new LocalFetcher<K, V>(jobConf, reduceId, scheduler, merger, reporter, metrics, this, reduceTask.getShuffleSecret(), localMapFiles); fetchers[0].start(); } else { for (int i=0; i < numFetchers; ++i) { fetchers[i] = new Fetcher<K,V>(jobConf, reduceId, scheduler, merger, reporter, metrics, this, reduceTask.getShuffleSecret()); fetchers[i].start(); } } // Wait for shuffle to complete successfully while (!scheduler.waitUntilDone(PROGRESS_FREQUENCY)) { reporter.progress(); synchronized (this) { if (throwable != null) { throw new ShuffleError("error in shuffle in " + throwingThreadName, throwable); } } } // Stop the event-fetcher thread eventFetcher.shutDown(); // Stop the map-output fetcher threads for (Fetcher<K,V> fetcher : fetchers) { fetcher.shutDown(); } // stop the scheduler scheduler.close(); copyPhase.complete(); // copy is already complete taskStatus.setPhase(TaskStatus.Phase.SORT); reduceTask.statusUpdate(umbilical); // Finish the on-going merges... RawKeyValueIterator kvIter = null; try { kvIter = merger.close(); } catch (Throwable e) { throw new ShuffleError("Error while doing final merge " , e); } // Sanity check synchronized (this) { if (throwable != null) { throw new ShuffleError("error in shuffle in " + throwingThreadName, throwable); } } return kvIter; }
Example #23
Source File: MergeManagerImpl.java From big-c with Apache License 2.0 | 4 votes |
public RawKVIteratorReader(RawKeyValueIterator kvIter, long size) throws IOException { super(null, null, size, null, spilledRecordsCounter); this.kvIter = kvIter; }
Example #24
Source File: MergeManagerImpl.java From big-c with Apache License 2.0 | 4 votes |
@Override public void merge(List<CompressAwarePath> inputs) throws IOException { // sanity check if (inputs == null || inputs.isEmpty()) { LOG.info("No ondisk files to merge..."); return; } long approxOutputSize = 0; int bytesPerSum = jobConf.getInt("io.bytes.per.checksum", 512); LOG.info("OnDiskMerger: We have " + inputs.size() + " map outputs on disk. Triggering merge..."); // 1. Prepare the list of files to be merged. for (CompressAwarePath file : inputs) { approxOutputSize += localFS.getFileStatus(file).getLen(); } // add the checksum length approxOutputSize += ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum); // 2. Start the on-disk merge process Path outputPath = localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(), approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX); FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath)); Writer<K, V> writer = new Writer<K, V>(jobConf, out, (Class<K>) jobConf.getMapOutputKeyClass(), (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true); RawKeyValueIterator iter = null; CompressAwarePath compressAwarePath; Path tmpDir = new Path(reduceId.toString()); try { iter = Merger.merge(jobConf, rfs, (Class<K>) jobConf.getMapOutputKeyClass(), (Class<V>) jobConf.getMapOutputValueClass(), codec, inputs.toArray(new Path[inputs.size()]), true, ioSortFactor, tmpDir, (RawComparator<K>) jobConf.getOutputKeyComparator(), reporter, spilledRecordsCounter, null, mergedMapOutputsCounter, null); Merger.writeFile(iter, writer, reporter, jobConf); writer.close(); compressAwarePath = new CompressAwarePath(outputPath, writer.getRawLength(), writer.getCompressedLength()); } catch (IOException e) { localFS.delete(outputPath, true); throw e; } closeOnDiskFile(compressAwarePath); LOG.info(reduceId + " Finished merging " + inputs.size() + " map output files on disk of total-size " + approxOutputSize + "." + " Local output file is " + outputPath + " of size " + localFS.getFileStatus(outputPath).getLen()); }
Example #25
Source File: TestGridMixClasses.java From hadoop with Apache License 2.0 | 4 votes |
@Test (timeout=3000) public void testSleepReducer() throws Exception { Configuration conf = new Configuration(); conf.setInt(JobContext.NUM_REDUCES, 2); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(FileOutputFormat.COMPRESS, true); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); TaskAttemptID taskId = new TaskAttemptID(); RawKeyValueIterator input = new FakeRawKeyValueReducerIterator(); Counter counter = new GenericCounter(); Counter inputValueCounter = new GenericCounter(); RecordWriter<NullWritable, NullWritable> output = new LoadRecordReduceWriter(); OutputCommitter committer = new CustomOutputCommitter(); StatusReporter reporter = new DummyReporter(); RawComparator<GridmixKey> comparator = new FakeRawComparator(); ReduceContext<GridmixKey, NullWritable, NullWritable, NullWritable> reducecontext = new ReduceContextImpl<GridmixKey, NullWritable, NullWritable, NullWritable>( conf, taskId, input, counter, inputValueCounter, output, committer, reporter, comparator, GridmixKey.class, NullWritable.class); org.apache.hadoop.mapreduce.Reducer<GridmixKey, NullWritable, NullWritable, NullWritable>.Context context = new WrappedReducer<GridmixKey, NullWritable, NullWritable, NullWritable>() .getReducerContext(reducecontext); SleepReducer test = new SleepReducer(); long start = System.currentTimeMillis(); test.setup(context); long sleeper = context.getCurrentKey().getReduceOutputBytes(); // status has been changed assertEquals("Sleeping... " + sleeper + " ms left", context.getStatus()); // should sleep 0.9 sec assertTrue(System.currentTimeMillis() >= (start + sleeper)); test.cleanup(context); // status has been changed again assertEquals("Slept for " + sleeper, context.getStatus()); }
Example #26
Source File: TestGridMixClasses.java From hadoop with Apache License 2.0 | 4 votes |
@Test (timeout=3000) public void testLoadJobLoadReducer() throws Exception { LoadJob.LoadReducer test = new LoadJob.LoadReducer(); Configuration conf = new Configuration(); conf.setInt(JobContext.NUM_REDUCES, 2); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(FileOutputFormat.COMPRESS, true); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); TaskAttemptID taskid = new TaskAttemptID(); RawKeyValueIterator input = new FakeRawKeyValueIterator(); Counter counter = new GenericCounter(); Counter inputValueCounter = new GenericCounter(); LoadRecordWriter output = new LoadRecordWriter(); OutputCommitter committer = new CustomOutputCommitter(); StatusReporter reporter = new DummyReporter(); RawComparator<GridmixKey> comparator = new FakeRawComparator(); ReduceContext<GridmixKey, GridmixRecord, NullWritable, GridmixRecord> reduceContext = new ReduceContextImpl<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>( conf, taskid, input, counter, inputValueCounter, output, committer, reporter, comparator, GridmixKey.class, GridmixRecord.class); // read for previous data reduceContext.nextKeyValue(); org.apache.hadoop.mapreduce.Reducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>.Context context = new WrappedReducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>() .getReducerContext(reduceContext); // test.setup(context); test.run(context); // have been readed 9 records (-1 for previous) assertEquals(9, counter.getValue()); assertEquals(10, inputValueCounter.getValue()); assertEquals(1, output.getData().size()); GridmixRecord record = output.getData().values().iterator() .next(); assertEquals(1593, record.getSize()); }
Example #27
Source File: TestMerger.java From hadoop with Apache License 2.0 | 4 votes |
@SuppressWarnings( { "unchecked" }) public void testMergeShouldReturnProperProgress( List<Segment<Text, Text>> segments) throws IOException { Path tmpDir = new Path("localpath"); Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass(); Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass(); RawComparator<Text> comparator = jobConf.getOutputKeyComparator(); Counter readsCounter = new Counter(); Counter writesCounter = new Counter(); Progress mergePhase = new Progress(); RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass, valueClass, segments, 2, tmpDir, comparator, getReporter(), readsCounter, writesCounter, mergePhase); final float epsilon = 0.00001f; // Reading 6 keys total, 3 each in 2 segments, so each key read moves the // progress forward 1/6th of the way. Initially the first keys from each // segment have been read as part of the merge setup, so progress = 2/6. Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon); // The first next() returns one of the keys already read during merge setup Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon); // Subsequent next() calls should read one key and move progress Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(3/6.0f, mergeQueue.getProgress().get(), epsilon); Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon); // At this point we've exhausted all of the keys in one segment // so getting the next key will return the already cached key from the // other segment Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon); // Subsequent next() calls should read one key and move progress Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(5/6.0f, mergeQueue.getProgress().get(), epsilon); Assert.assertTrue(mergeQueue.next()); Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon); // Now there should be no more input Assert.assertFalse(mergeQueue.next()); Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon); Assert.assertTrue(mergeQueue.getKey() == null); Assert.assertEquals(0, mergeQueue.getValue().getData().length); }
Example #28
Source File: TestShufflePlugin.java From hadoop with Apache License 2.0 | 4 votes |
@Override public RawKeyValueIterator run() throws java.io.IOException, java.lang.InterruptedException{ return null; }
Example #29
Source File: Shuffle.java From hadoop with Apache License 2.0 | 4 votes |
@Override public RawKeyValueIterator run() throws IOException, InterruptedException { // Scale the maximum events we fetch per RPC call to mitigate OOM issues // on the ApplicationMaster when a thundering herd of reducers fetch events // TODO: This should not be necessary after HADOOP-8942 int eventsPerReducer = Math.max(MIN_EVENTS_TO_FETCH, MAX_RPC_OUTSTANDING_EVENTS / jobConf.getNumReduceTasks()); int maxEventsToFetch = Math.min(MAX_EVENTS_TO_FETCH, eventsPerReducer); // Start the map-completion events fetcher thread final EventFetcher<K,V> eventFetcher = new EventFetcher<K,V>(reduceId, umbilical, scheduler, this, maxEventsToFetch); eventFetcher.start(); // Start the map-output fetcher threads boolean isLocal = localMapFiles != null; final int numFetchers = isLocal ? 1 : jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5); Fetcher<K,V>[] fetchers = new Fetcher[numFetchers]; if (isLocal) { fetchers[0] = new LocalFetcher<K, V>(jobConf, reduceId, scheduler, merger, reporter, metrics, this, reduceTask.getShuffleSecret(), localMapFiles); fetchers[0].start(); } else { for (int i=0; i < numFetchers; ++i) { fetchers[i] = new Fetcher<K,V>(jobConf, reduceId, scheduler, merger, reporter, metrics, this, reduceTask.getShuffleSecret()); fetchers[i].start(); } } // Wait for shuffle to complete successfully while (!scheduler.waitUntilDone(PROGRESS_FREQUENCY)) { reporter.progress(); synchronized (this) { if (throwable != null) { throw new ShuffleError("error in shuffle in " + throwingThreadName, throwable); } } } // Stop the event-fetcher thread eventFetcher.shutDown(); // Stop the map-output fetcher threads for (Fetcher<K,V> fetcher : fetchers) { fetcher.shutDown(); } // stop the scheduler scheduler.close(); copyPhase.complete(); // copy is already complete taskStatus.setPhase(TaskStatus.Phase.SORT); reduceTask.statusUpdate(umbilical); // Finish the on-going merges... RawKeyValueIterator kvIter = null; try { kvIter = merger.close(); } catch (Throwable e) { throw new ShuffleError("Error while doing final merge " , e); } // Sanity check synchronized (this) { if (throwable != null) { throw new ShuffleError("error in shuffle in " + throwingThreadName, throwable); } } return kvIter; }
Example #30
Source File: MergeManagerImpl.java From hadoop with Apache License 2.0 | 4 votes |
public RawKVIteratorReader(RawKeyValueIterator kvIter, long size) throws IOException { super(null, null, size, null, spilledRecordsCounter); this.kvIter = kvIter; }