Java Code Examples for org.apache.hadoop.mapreduce.MapReduceTestUtil#createDummyMapTaskAttemptContext()
The following examples show how to use
org.apache.hadoop.mapreduce.MapReduceTestUtil#createDummyMapTaskAttemptContext() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMRKeyValueTextInputFormat.java From big-c with Apache License 2.0 | 6 votes |
private static List<Text> readSplit(KeyValueTextInputFormat format, InputSplit split, Job job) throws IOException, InterruptedException { List<Text> result = new ArrayList<Text>(); Configuration conf = job.getConfiguration(); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(conf); RecordReader<Text, Text> reader = format.createRecordReader(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf)); MapContext<Text, Text, Text, Text> mcontext = new MapContextImpl<Text, Text, Text, Text>(conf, context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); while (reader.nextKeyValue()) { result.add(new Text(reader.getCurrentValue())); } reader.close(); return result; }
Example 2
Source File: TestCombineTextInputFormat.java From big-c with Apache License 2.0 | 6 votes |
private static List<Text> readSplit(InputFormat<LongWritable,Text> format, InputSplit split, Job job) throws IOException, InterruptedException { List<Text> result = new ArrayList<Text>(); Configuration conf = job.getConfiguration(); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(conf); RecordReader<LongWritable, Text> reader = format.createRecordReader(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf)); MapContext<LongWritable,Text,LongWritable,Text> mcontext = new MapContextImpl<LongWritable,Text,LongWritable,Text>(conf, context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); while (reader.nextKeyValue()) { result.add(new Text(reader.getCurrentValue())); } return result; }
Example 3
Source File: TestCombineTextInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
private static List<Text> readSplit(InputFormat<LongWritable,Text> format, InputSplit split, Job job) throws IOException, InterruptedException { List<Text> result = new ArrayList<Text>(); Configuration conf = job.getConfiguration(); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(conf); RecordReader<LongWritable, Text> reader = format.createRecordReader(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf)); MapContext<LongWritable,Text,LongWritable,Text> mcontext = new MapContextImpl<LongWritable,Text,LongWritable,Text>(conf, context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); while (reader.nextKeyValue()) { result.add(new Text(reader.getCurrentValue())); } return result; }
Example 4
Source File: TestFixedLengthInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
/** * Test with record length set to 0 */ @Test (timeout=5000) public void testZeroRecordLength() throws Exception { localFs.delete(workDir, true); Path file = new Path(workDir, new String("testFormat.txt")); createFile(file, null, 10, 10); Job job = Job.getInstance(defaultConf); // Set the fixed length record length config property FixedLengthInputFormat format = new FixedLengthInputFormat(); format.setRecordLength(job.getConfiguration(), 0); FileInputFormat.setInputPaths(job, workDir); List<InputSplit> splits = format.getSplits(job); boolean exceptionThrown = false; for (InputSplit split : splits) { try { TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext( job.getConfiguration()); RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context); MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); } catch(IOException ioe) { exceptionThrown = true; LOG.info("Exception message:" + ioe.getMessage()); } } assertTrue("Exception for zero record length:", exceptionThrown); }
Example 5
Source File: TestMRSequenceFileInputFilter.java From big-c with Apache License 2.0 | 5 votes |
private int countRecords(int numSplits) throws IOException, InterruptedException { InputFormat<Text, BytesWritable> format = new SequenceFileInputFilter<Text, BytesWritable>(); if (numSplits == 0) { numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1; } FileInputFormat.setMaxInputSplitSize(job, fs.getFileStatus(inFile).getLen() / numSplits); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); // check each split int count = 0; for (InputSplit split : format.getSplits(job)) { RecordReader<Text, BytesWritable> reader = format.createRecordReader(split, context); MapContext<Text, BytesWritable, Text, BytesWritable> mcontext = new MapContextImpl<Text, BytesWritable, Text, BytesWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { while (reader.nextKeyValue()) { LOG.info("Accept record " + reader.getCurrentKey().toString()); count++; } } finally { reader.close(); } } return count; }
Example 6
Source File: TestFixedLengthInputFormat.java From big-c with Apache License 2.0 | 5 votes |
/** * Test with no record length set. */ @Test (timeout=5000) public void testNoRecordLength() throws Exception { localFs.delete(workDir, true); Path file = new Path(workDir, new String("testFormat.txt")); createFile(file, null, 10, 10); // Create the job and do not set fixed record length Job job = Job.getInstance(defaultConf); FileInputFormat.setInputPaths(job, workDir); FixedLengthInputFormat format = new FixedLengthInputFormat(); List<InputSplit> splits = format.getSplits(job); boolean exceptionThrown = false; for (InputSplit split : splits) { try { TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context); MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); } catch(IOException ioe) { exceptionThrown = true; LOG.info("Exception message:" + ioe.getMessage()); } } assertTrue("Exception for not setting record length:", exceptionThrown); }
Example 7
Source File: TestDistCacheEmulation.java From hadoop with Apache License 2.0 | 5 votes |
/** * Validate setupGenerateDistCacheData by validating <li>permissions of the * distributed cache directories and <li>content of the generated sequence * file. This includes validation of dist cache file paths and their file * sizes. */ private void validateSetupGenDC(Configuration jobConf, long[] sortedFileSizes) throws IOException, InterruptedException { // build things needed for validation long sumOfFileSizes = 0; for (int i = 0; i < sortedFileSizes.length; i++) { sumOfFileSizes += sortedFileSizes[i]; } FileSystem fs = FileSystem.get(jobConf); assertEquals("Number of distributed cache files to be generated is wrong.", sortedFileSizes.length, jobConf.getInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, -1)); assertEquals("Total size of dist cache files to be generated is wrong.", sumOfFileSizes, jobConf.getLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, -1)); Path filesListFile = new Path( jobConf.get(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST)); FileStatus stat = fs.getFileStatus(filesListFile); assertEquals("Wrong permissions of dist Cache files list file " + filesListFile, new FsPermission((short) 0644), stat.getPermission()); InputSplit split = new FileSplit(filesListFile, 0, stat.getLen(), (String[]) null); TaskAttemptContext taskContext = MapReduceTestUtil .createDummyMapTaskAttemptContext(jobConf); RecordReader<LongWritable, BytesWritable> reader = new GenerateDistCacheData.GenDCDataFormat() .createRecordReader(split, taskContext); MapContext<LongWritable, BytesWritable, NullWritable, BytesWritable> mapContext = new MapContextImpl<LongWritable, BytesWritable, NullWritable, BytesWritable>( jobConf, taskContext.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mapContext); // start validating setupGenerateDistCacheData doValidateSetupGenDC(reader, fs, sortedFileSizes); }
Example 8
Source File: TestDistCacheEmulation.java From big-c with Apache License 2.0 | 5 votes |
/** * Validate setupGenerateDistCacheData by validating <li>permissions of the * distributed cache directories and <li>content of the generated sequence * file. This includes validation of dist cache file paths and their file * sizes. */ private void validateSetupGenDC(Configuration jobConf, long[] sortedFileSizes) throws IOException, InterruptedException { // build things needed for validation long sumOfFileSizes = 0; for (int i = 0; i < sortedFileSizes.length; i++) { sumOfFileSizes += sortedFileSizes[i]; } FileSystem fs = FileSystem.get(jobConf); assertEquals("Number of distributed cache files to be generated is wrong.", sortedFileSizes.length, jobConf.getInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, -1)); assertEquals("Total size of dist cache files to be generated is wrong.", sumOfFileSizes, jobConf.getLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, -1)); Path filesListFile = new Path( jobConf.get(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST)); FileStatus stat = fs.getFileStatus(filesListFile); assertEquals("Wrong permissions of dist Cache files list file " + filesListFile, new FsPermission((short) 0644), stat.getPermission()); InputSplit split = new FileSplit(filesListFile, 0, stat.getLen(), (String[]) null); TaskAttemptContext taskContext = MapReduceTestUtil .createDummyMapTaskAttemptContext(jobConf); RecordReader<LongWritable, BytesWritable> reader = new GenerateDistCacheData.GenDCDataFormat() .createRecordReader(split, taskContext); MapContext<LongWritable, BytesWritable, NullWritable, BytesWritable> mapContext = new MapContextImpl<LongWritable, BytesWritable, NullWritable, BytesWritable>( jobConf, taskContext.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mapContext); // start validating setupGenerateDistCacheData doValidateSetupGenDC(reader, fs, sortedFileSizes); }
Example 9
Source File: TestFixedLengthInputFormat.java From big-c with Apache License 2.0 | 5 votes |
private static List<String> readSplit(FixedLengthInputFormat format, InputSplit split, Job job) throws Exception { List<String> result = new ArrayList<String>(); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context); MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); LongWritable key; BytesWritable value; try { reader.initialize(split, mcontext); while (reader.nextKeyValue()) { key = reader.getCurrentKey(); value = reader.getCurrentValue(); result.add(new String(value.getBytes(), 0, value.getLength())); } } finally { reader.close(); } return result; }
Example 10
Source File: TestFixedLengthInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
/** * Test with no record length set. */ @Test (timeout=5000) public void testNoRecordLength() throws Exception { localFs.delete(workDir, true); Path file = new Path(workDir, new String("testFormat.txt")); createFile(file, null, 10, 10); // Create the job and do not set fixed record length Job job = Job.getInstance(defaultConf); FileInputFormat.setInputPaths(job, workDir); FixedLengthInputFormat format = new FixedLengthInputFormat(); List<InputSplit> splits = format.getSplits(job); boolean exceptionThrown = false; for (InputSplit split : splits) { try { TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context); MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); } catch(IOException ioe) { exceptionThrown = true; LOG.info("Exception message:" + ioe.getMessage()); } } assertTrue("Exception for not setting record length:", exceptionThrown); }
Example 11
Source File: TestMRSequenceFileInputFilter.java From hadoop with Apache License 2.0 | 5 votes |
private int countRecords(int numSplits) throws IOException, InterruptedException { InputFormat<Text, BytesWritable> format = new SequenceFileInputFilter<Text, BytesWritable>(); if (numSplits == 0) { numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1; } FileInputFormat.setMaxInputSplitSize(job, fs.getFileStatus(inFile).getLen() / numSplits); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); // check each split int count = 0; for (InputSplit split : format.getSplits(job)) { RecordReader<Text, BytesWritable> reader = format.createRecordReader(split, context); MapContext<Text, BytesWritable, Text, BytesWritable> mcontext = new MapContextImpl<Text, BytesWritable, Text, BytesWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { while (reader.nextKeyValue()) { LOG.info("Accept record " + reader.getCurrentKey().toString()); count++; } } finally { reader.close(); } } return count; }
Example 12
Source File: TestFixedLengthInputFormat.java From big-c with Apache License 2.0 | 5 votes |
/** * Test with record length set to 0 */ @Test (timeout=5000) public void testZeroRecordLength() throws Exception { localFs.delete(workDir, true); Path file = new Path(workDir, new String("testFormat.txt")); createFile(file, null, 10, 10); Job job = Job.getInstance(defaultConf); // Set the fixed length record length config property FixedLengthInputFormat format = new FixedLengthInputFormat(); format.setRecordLength(job.getConfiguration(), 0); FileInputFormat.setInputPaths(job, workDir); List<InputSplit> splits = format.getSplits(job); boolean exceptionThrown = false; for (InputSplit split : splits) { try { TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext( job.getConfiguration()); RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context); MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); } catch(IOException ioe) { exceptionThrown = true; LOG.info("Exception message:" + ioe.getMessage()); } } assertTrue("Exception for zero record length:", exceptionThrown); }
Example 13
Source File: TestMRSequenceFileAsBinaryOutputFormat.java From hadoop with Apache License 2.0 | 4 votes |
public void testBinary() throws IOException, InterruptedException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.nextLong(); r.setSeed(seed); FileOutputFormat.setOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class ); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class ); SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat(); OutputCommitter committer = outputFormat.getOutputCommitter(context); committer.setupJob(job); RecordWriter<BytesWritable, BytesWritable> writer = outputFormat. getRecordWriter(context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); LOG.info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < RECORDS; ++i) { iwritable = new IntWritable(r.nextInt()); iwritable.write(outbuf); bkey.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); dwritable = new DoubleWritable(r.nextDouble()); dwritable.write(outbuf); bval.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); writer.write(bkey, bval); } } finally { writer.close(context); } committer.commitTask(context); committer.commitJob(job); InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>(); int count = 0; r.setSeed(seed); SequenceFileInputFormat.setInputPaths(job, outdir); LOG.info("Reading data by SequenceFileInputFormat"); for (InputSplit split : iformat.getSplits(job)) { RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context); MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.nextKeyValue()) { sourceInt = r.nextInt(); sourceDouble = r.nextDouble(); iwritable = reader.getCurrentKey(); dwritable = reader.getCurrentValue(); assertEquals( "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get()); assertTrue( "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0 ); ++count; } } finally { reader.close(); } } assertEquals("Some records not found", RECORDS, count); }
Example 14
Source File: TestCombineTextInputFormat.java From big-c with Apache License 2.0 | 4 votes |
@Test(timeout=10000) public void testFormat() throws Exception { Job job = Job.getInstance(new Configuration(defaultConf)); Random random = new Random(); long seed = random.nextLong(); LOG.info("seed = " + seed); random.setSeed(seed); localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); final int length = 10000; final int numFiles = 10; // create files with various lengths createFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(length/20) + 1; LOG.info("splitting: requesting = " + numSplits); List<InputSplit> splits = format.getSplits(job); LOG.info("splitting: got = " + splits.size()); // we should have a single split as the length is comfortably smaller than // the block size assertEquals("We got more than one splits!", 1, splits.size()); InputSplit split = splits.get(0); assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass()); // check the split BitSet bits = new BitSet(length); LOG.debug("split= " + split); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); RecordReader<LongWritable, Text> reader = format.createRecordReader(split, context); assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass()); MapContext<LongWritable,Text,LongWritable,Text> mcontext = new MapContextImpl<LongWritable,Text,LongWritable,Text>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { int count = 0; while (reader.nextKeyValue()) { LongWritable key = reader.getCurrentKey(); assertNotNull("Key should not be null.", key); Text value = reader.getCurrentValue(); final int v = Integer.parseInt(value.toString()); LOG.debug("read " + v); assertFalse("Key in multiple partitions.", bits.get(v)); bits.set(v); count++; } LOG.debug("split=" + split + " count=" + count); } finally { reader.close(); } assertEquals("Some keys in no partition.", length, bits.cardinality()); } }
Example 15
Source File: TestCombineSequenceFileInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
@Test(timeout=10000) public void testFormat() throws IOException, InterruptedException { Job job = Job.getInstance(conf); Random random = new Random(); long seed = random.nextLong(); random.setSeed(seed); localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); final int length = 10000; final int numFiles = 10; // create files with a variety of lengths createFiles(length, numFiles, random, job); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); // create a combine split for the files InputFormat<IntWritable,BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable,BytesWritable>(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(length/(SequenceFile.SYNC_INTERVAL/20)) + 1; LOG.info("splitting: requesting = " + numSplits); List<InputSplit> splits = format.getSplits(job); LOG.info("splitting: got = " + splits.size()); // we should have a single split as the length is comfortably smaller than // the block size assertEquals("We got more than one splits!", 1, splits.size()); InputSplit split = splits.get(0); assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass()); // check the split BitSet bits = new BitSet(length); RecordReader<IntWritable,BytesWritable> reader = format.createRecordReader(split, context); MapContext<IntWritable,BytesWritable,IntWritable,BytesWritable> mcontext = new MapContextImpl<IntWritable,BytesWritable,IntWritable,BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass()); try { while (reader.nextKeyValue()) { IntWritable key = reader.getCurrentKey(); BytesWritable value = reader.getCurrentValue(); assertNotNull("Value should not be null.", value); final int k = key.get(); LOG.debug("read " + k); assertFalse("Key in multiple partitions.", bits.get(k)); bits.set(k); } } finally { reader.close(); } assertEquals("Some keys in no partition.", length, bits.cardinality()); } }
Example 16
Source File: TestMRSequenceFileAsBinaryOutputFormat.java From big-c with Apache License 2.0 | 4 votes |
public void testBinary() throws IOException, InterruptedException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.nextLong(); r.setSeed(seed); FileOutputFormat.setOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class ); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class ); SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat(); OutputCommitter committer = outputFormat.getOutputCommitter(context); committer.setupJob(job); RecordWriter<BytesWritable, BytesWritable> writer = outputFormat. getRecordWriter(context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); LOG.info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < RECORDS; ++i) { iwritable = new IntWritable(r.nextInt()); iwritable.write(outbuf); bkey.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); dwritable = new DoubleWritable(r.nextDouble()); dwritable.write(outbuf); bval.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); writer.write(bkey, bval); } } finally { writer.close(context); } committer.commitTask(context); committer.commitJob(job); InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>(); int count = 0; r.setSeed(seed); SequenceFileInputFormat.setInputPaths(job, outdir); LOG.info("Reading data by SequenceFileInputFormat"); for (InputSplit split : iformat.getSplits(job)) { RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context); MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.nextKeyValue()) { sourceInt = r.nextInt(); sourceDouble = r.nextDouble(); iwritable = reader.getCurrentKey(); dwritable = reader.getCurrentValue(); assertEquals( "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get()); assertTrue( "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0 ); ++count; } } finally { reader.close(); } } assertEquals("Some records not found", RECORDS, count); }
Example 17
Source File: TestCombineSequenceFileInputFormat.java From big-c with Apache License 2.0 | 4 votes |
@Test(timeout=10000) public void testFormat() throws IOException, InterruptedException { Job job = Job.getInstance(conf); Random random = new Random(); long seed = random.nextLong(); random.setSeed(seed); localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); final int length = 10000; final int numFiles = 10; // create files with a variety of lengths createFiles(length, numFiles, random, job); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); // create a combine split for the files InputFormat<IntWritable,BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable,BytesWritable>(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(length/(SequenceFile.SYNC_INTERVAL/20)) + 1; LOG.info("splitting: requesting = " + numSplits); List<InputSplit> splits = format.getSplits(job); LOG.info("splitting: got = " + splits.size()); // we should have a single split as the length is comfortably smaller than // the block size assertEquals("We got more than one splits!", 1, splits.size()); InputSplit split = splits.get(0); assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass()); // check the split BitSet bits = new BitSet(length); RecordReader<IntWritable,BytesWritable> reader = format.createRecordReader(split, context); MapContext<IntWritable,BytesWritable,IntWritable,BytesWritable> mcontext = new MapContextImpl<IntWritable,BytesWritable,IntWritable,BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass()); try { while (reader.nextKeyValue()) { IntWritable key = reader.getCurrentKey(); BytesWritable value = reader.getCurrentValue(); assertNotNull("Value should not be null.", value); final int k = key.get(); LOG.debug("read " + k); assertFalse("Key in multiple partitions.", bits.get(k)); bits.set(k); } } finally { reader.close(); } assertEquals("Some keys in no partition.", length, bits.cardinality()); } }
Example 18
Source File: TestMRKeyValueTextInputFormat.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testSplitableCodecs() throws Exception { final Job job = Job.getInstance(defaultConf); final Configuration conf = job.getConfiguration(); // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName("org.apache.hadoop.io.compress.BZip2Codec"), conf); } catch (ClassNotFoundException cnfe) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test"+codec.getDefaultExtension()); int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); final int MAX_LENGTH = 500000; FileInputFormat.setMaxInputSplitSize(job, MAX_LENGTH / 20); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 4) + 1) { LOG.info("creating; entries = " + length); // create a file with length entries Writer writer = new OutputStreamWriter(codec.createOutputStream(localFs.create(file))); try { for (int i = 0; i < length; i++) { writer.write(Integer.toString(i * 2)); writer.write("\t"); writer.write(Integer.toString(i)); writer.write("\n"); } } finally { writer.close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); assertTrue("KVTIF claims not splittable", format.isSplitable(job, file)); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / 2000) + 1; LOG.info("splitting: requesting = " + numSplits); List<InputSplit> splits = format.getSplits(job); LOG.info("splitting: got = " + splits.size()); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.size(); j++) { LOG.debug("split["+j+"]= " + splits.get(j)); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); RecordReader<Text, Text> reader = format.createRecordReader( splits.get(j), context); Class<?> clazz = reader.getClass(); MapContext<Text, Text, Text, Text> mcontext = new MapContextImpl<Text, Text, Text, Text>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), splits.get(j)); reader.initialize(splits.get(j), mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.nextKeyValue()) { key = reader.getCurrentKey(); value = reader.getCurrentValue(); final int k = Integer.parseInt(key.toString()); final int v = Integer.parseInt(value.toString()); assertEquals("Bad key", 0, k % 2); assertEquals("Mismatched key/value", k / 2, v); LOG.debug("read " + k + "," + v); assertFalse(k + "," + v + " in multiple partitions.",bits.get(v)); bits.set(v); count++; } if (count > 0) { LOG.info("splits["+j+"]="+splits.get(j)+" count=" + count); } else { LOG.debug("splits["+j+"]="+splits.get(j)+" count=" + count); } } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
Example 19
Source File: TestMRKeyValueTextInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testFormat() throws Exception { Job job = Job.getInstance(new Configuration(defaultConf)); Path file = new Path(workDir, "test.txt"); int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); final int MAX_LENGTH = 10000; // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { LOG.debug("creating; entries = " + length); // create a file with length entries Writer writer = new OutputStreamWriter(localFs.create(file)); try { for (int i = 0; i < length; i++) { writer.write(Integer.toString(i * 2)); writer.write("\t"); writer.write(Integer.toString(i)); writer.write("\n"); } } finally { writer.close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / 20) + 1; LOG.debug("splitting: requesting = " + numSplits); List<InputSplit> splits = format.getSplits(job); LOG.debug("splitting: got = " + splits.size()); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.size(); j++) { LOG.debug("split["+j+"]= " + splits.get(j)); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); RecordReader<Text, Text> reader = format.createRecordReader( splits.get(j), context); Class<?> clazz = reader.getClass(); assertEquals("reader class is KeyValueLineRecordReader.", KeyValueLineRecordReader.class, clazz); MapContext<Text, Text, Text, Text> mcontext = new MapContextImpl<Text, Text, Text, Text>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), splits.get(j)); reader.initialize(splits.get(j), mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.nextKeyValue()) { key = reader.getCurrentKey(); clazz = key.getClass(); assertEquals("Key class is Text.", Text.class, clazz); value = reader.getCurrentValue(); clazz = value.getClass(); assertEquals("Value class is Text.", Text.class, clazz); final int k = Integer.parseInt(key.toString()); final int v = Integer.parseInt(value.toString()); assertEquals("Bad key", 0, k % 2); assertEquals("Mismatched key/value", k / 2, v); LOG.debug("read " + v); assertFalse("Key in multiple partitions.", bits.get(v)); bits.set(v); count++; } LOG.debug("splits[" + j + "]=" + splits.get(j) +" count=" + count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
Example 20
Source File: TestMRSequenceFileAsBinaryInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
public void testBinary() throws IOException, InterruptedException { Job job = Job.getInstance(); FileSystem fs = FileSystem.getLocal(job.getConfiguration()); Path dir = new Path(System.getProperty("test.build.data",".") + "/mapred"); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.nextLong(); r.setSeed(seed); fs.delete(dir, true); FileInputFormat.setInputPaths(job, dir); Text tkey = new Text(); Text tval = new Text(); SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.getConfiguration(), file, Text.class, Text.class); try { for (int i = 0; i < RECORDS; ++i) { tkey.set(Integer.toString(r.nextInt(), 36)); tval.set(Long.toString(r.nextLong(), 36)); writer.append(tkey, tval); } } finally { writer.close(); } TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); InputFormat<BytesWritable,BytesWritable> bformat = new SequenceFileAsBinaryInputFormat(); int count = 0; r.setSeed(seed); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); Text cmpkey = new Text(); Text cmpval = new Text(); DataInputBuffer buf = new DataInputBuffer(); FileInputFormat.setInputPaths(job, file); for (InputSplit split : bformat.getSplits(job)) { RecordReader<BytesWritable, BytesWritable> reader = bformat.createRecordReader(split, context); MapContext<BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { while (reader.nextKeyValue()) { bkey = reader.getCurrentKey(); bval = reader.getCurrentValue(); tkey.set(Integer.toString(r.nextInt(), 36)); tval.set(Long.toString(r.nextLong(), 36)); buf.reset(bkey.getBytes(), bkey.getLength()); cmpkey.readFields(buf); buf.reset(bval.getBytes(), bval.getLength()); cmpval.readFields(buf); assertTrue( "Keys don't match: " + "*" + cmpkey.toString() + ":" + tkey.toString() + "*", cmpkey.toString().equals(tkey.toString())); assertTrue( "Vals don't match: " + "*" + cmpval.toString() + ":" + tval.toString() + "*", cmpval.toString().equals(tval.toString())); ++count; } } finally { reader.close(); } } assertEquals("Some records not found", RECORDS, count); }