org.apache.hadoop.mapreduce.OutputFormat Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.OutputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultiMROutput.java From tez with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private synchronized RecordWriter getNewRecordWriter( TaskAttemptContext taskContext, String baseFileName) throws IOException, InterruptedException { // look for record-writer in the cache RecordWriter writer = newRecordWriters.get(baseFileName); // If not in cache, create a new one if (writer == null) { // get the record writer from context output format taskContext.getConfiguration().set( MRJobConfig.FILEOUTPUTFORMAT_BASE_OUTPUT_NAME, baseFileName); try { writer = ((OutputFormat) ReflectionUtils.newInstance( taskContext.getOutputFormatClass(), taskContext.getConfiguration())) .getRecordWriter(taskContext); } catch (ClassNotFoundException e) { throw new IOException(e); } // add the record-writer to the cache newRecordWriters.put(baseFileName, writer); } return writer; }
Example #2
Source File: TestJobOutputCommitter.java From big-c with Apache License 2.0 | 6 votes |
private void testFailedJob(String fileName, Class<? extends OutputFormat> output, String[] exclude) throws Exception { Path outDir = getNewOutputDir(); Job job = MapReduceTestUtil.createFailJob(conf, outDir, inDir); job.setOutputFormatClass(output); assertFalse("Job did not fail!", job.waitForCompletion(true)); if (fileName != null) { Path testFile = new Path(outDir, fileName); assertTrue("File " + testFile + " missing for failed job " + job.getJobID(), fs.exists(testFile)); } // check if the files from the missing set exists for (String ex : exclude) { Path file = new Path(outDir, ex); assertFalse("File " + file + " should not be present for failed job " + job.getJobID(), fs.exists(file)); } }
Example #3
Source File: MRJobTestUtil.java From sqoop-on-spark with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") public static boolean runJob(Configuration conf, Class<? extends InputFormat<?,?>> inputFormatClass, Class<? extends Mapper<?,?,?,?>> mapperClass, Class<? extends OutputFormat<?,?>> outputFormatClass) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(conf); job.setInputFormatClass(inputFormatClass); job.setMapperClass(mapperClass); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); boolean ret = job.waitForCompletion(true); // Hadoop 1.0 (and 0.20) have nasty bug when job committer is not called in // LocalJobRuner if (isHadoop1()) { callOutputCommitter(job, outputFormatClass); } return ret; }
Example #4
Source File: AvroStorage.java From Cubert with Apache License 2.0 | 6 votes |
@SuppressWarnings("rawtypes") @Override public OutputFormat getOutputFormat() throws IOException { AvroStorageLog.funcCall("getOutputFormat"); Properties property = getUDFProperties(); String allSchemaStr = property.getProperty(AVRO_OUTPUT_SCHEMA_PROPERTY); Map<String, String> map = (allSchemaStr != null) ? parseSchemaMap(allSchemaStr) : null; String key = getSchemaKey(); Schema schema = (map == null || !map.containsKey(key)) ? outputAvroSchema : Schema.parse(map.get(key)); if (schema == null) throw new IOException("Output schema is null!"); AvroStorageLog.details("Output schema=" + schema); return new PigAvroOutputFormat(schema); }
Example #5
Source File: AccumuloMrsPyramidOutputFormatProvider.java From mrgeo with Apache License 2.0 | 6 votes |
@Override public OutputFormat getOutputFormat() { // TODO Auto-generated method stub if (doBulk || forceBulk) { return new AccumuloMrsPyramidFileOutputFormat(zoomLevel, cv); //return new AccumuloMrsPyramidFileOutputFormat(); } else { return new AccumuloMrsPyramidOutputFormat(zoomLevel, cv); } }
Example #6
Source File: MultithreadedMapper.java From marklogic-contentpump with Apache License 2.0 | 6 votes |
MapRunner() throws IOException, InterruptedException, ClassNotFoundException { // initiate the real mapper that does the work mapper = ReflectionUtils.newInstance(mapClass, outer.getConfiguration()); @SuppressWarnings("unchecked") OutputFormat<K2, V2> outputFormat = (OutputFormat<K2, V2>) ReflectionUtils.newInstance(outer.getOutputFormatClass(), outer.getConfiguration()); try { // outputFormat is not initialized. Relying on everything it // needs can be obtained from the AssignmentManager singleton. writer = outputFormat.getRecordWriter(outer); subcontext = (Context)ReflectionUtil.createMapperContext( mapper, outer.getConfiguration(), outer.getTaskAttemptID(), new SubMapRecordReader(), writer, outer.getOutputCommitter(), new SubMapStatusReporter(), outer.getInputSplit()); } catch (Exception e) { throw new IOException("Error creating mapper context", e); } }
Example #7
Source File: MapReducePOStoreImpl.java From spork with Apache License 2.0 | 6 votes |
@Override public StoreFuncInterface createStoreFunc(POStore store) throws IOException { StoreFuncInterface storeFunc = store.getStoreFunc(); // call the setStoreLocation on the storeFunc giving it the // Job. Typically this will result in the OutputFormat of the // storeFunc storing the output location in the Configuration // in the Job. The PigOutFormat.setLocation() method will merge // this modified Configuration into the configuration of the // Context we have PigOutputFormat.setLocation(context, store); OutputFormat<?,?> outputFormat = storeFunc.getOutputFormat(); // create a new record writer try { writer = outputFormat.getRecordWriter(context); } catch (InterruptedException e) { throw new IOException(e); } storeFunc.prepareToWrite(writer); return storeFunc; }
Example #8
Source File: HadoopFormatIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
/** * This test validates functionality of {@link * HadoopFormatIO.Write.Builder#withConfiguration(Configuration) withConfiguration(Configuration)} * function when job id is not provided by the user in configuration. */ @Test public void testWriteValidationFailsMissingJobIDInConf() { Configuration configuration = new Configuration(); configuration.setClass( HadoopFormatIO.OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class, OutputFormat.class); configuration.setClass(HadoopFormatIO.OUTPUT_KEY_CLASS, Text.class, Object.class); configuration.setClass(HadoopFormatIO.OUTPUT_VALUE_CLASS, Employee.class, Object.class); configuration.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath()); runValidationPipeline(configuration); thrown.expect(Pipeline.PipelineExecutionException.class); thrown.expectMessage("Configuration must contain \"mapreduce.job.id\""); p.run().waitUntilFinish(); }
Example #9
Source File: TestJobOutputCommitter.java From hadoop with Apache License 2.0 | 6 votes |
private void testFailedJob(String fileName, Class<? extends OutputFormat> output, String[] exclude) throws Exception { Path outDir = getNewOutputDir(); Job job = MapReduceTestUtil.createFailJob(conf, outDir, inDir); job.setOutputFormatClass(output); assertFalse("Job did not fail!", job.waitForCompletion(true)); if (fileName != null) { Path testFile = new Path(outDir, fileName); assertTrue("File " + testFile + " missing for failed job " + job.getJobID(), fs.exists(testFile)); } // check if the files from the missing set exists for (String ex : exclude) { Path file = new Path(outDir, ex); assertFalse("File " + file + " should not be present for failed job " + job.getJobID(), fs.exists(file)); } }
Example #10
Source File: MneMapreduceLongDataTest.java From mnemonic with Apache License 2.0 | 6 votes |
@Test(enabled = true) public void testWriteLongData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<Long> sess = new MneDurableOutputSession<Long>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<Long> mdvalue = new MneDurableOutputValue<Long>(sess); OutputFormat<NullWritable, MneDurableOutputValue<Long>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Long>>(); RecordWriter<NullWritable, MneDurableOutputValue<Long>> writer = outputFormat.getRecordWriter(m_tacontext); Long val = null; for (int i = 0; i < m_reccnt; ++i) { val = m_rand.nextLong(); m_sum += val; writer.write(nada, mdvalue.of(val)); } writer.close(m_tacontext); sess.close(); }
Example #11
Source File: HadoopFormatIO.java From beam with Apache License 2.0 | 6 votes |
private static OutputCommitter initOutputCommitter( OutputFormat<?, ?> outputFormatObj, Configuration conf, TaskAttemptContext taskAttemptContext) throws IllegalStateException { OutputCommitter outputCommitter; try { outputCommitter = outputFormatObj.getOutputCommitter(taskAttemptContext); if (outputCommitter != null) { outputCommitter.setupJob(new JobContextImpl(conf, taskAttemptContext.getJobID())); } } catch (Exception e) { throw new IllegalStateException("Unable to create OutputCommitter object: ", e); } return outputCommitter; }
Example #12
Source File: MneMapreduceChunkDataTest.java From mnemonic with Apache License 2.0 | 6 votes |
@Test(enabled = true) public void testWriteChunkData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<DurableChunk<?>> sess = new MneDurableOutputSession<DurableChunk<?>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<DurableChunk<?>> mdvalue = new MneDurableOutputValue<DurableChunk<?>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>(); RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer = outputFormat.getRecordWriter(m_tacontext); DurableChunk<?> dchunk = null; Checksum cs = new CRC32(); cs.reset(); for (int i = 0; i < m_reccnt; ++i) { dchunk = genupdDurableChunk(sess, cs); Assert.assertNotNull(dchunk); writer.write(nada, mdvalue.of(dchunk)); } m_checksum = cs.getValue(); writer.close(m_tacontext); sess.close(); }
Example #13
Source File: MneMapreduceBufferDataTest.java From mnemonic with Apache License 2.0 | 6 votes |
@Test(enabled = true) public void testWriteBufferData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<DurableBuffer<?>> sess = new MneDurableOutputSession<DurableBuffer<?>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<DurableBuffer<?>> mdvalue = new MneDurableOutputValue<DurableBuffer<?>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>(); RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer = outputFormat.getRecordWriter(m_tacontext); DurableBuffer<?> dbuf = null; Checksum cs = new CRC32(); cs.reset(); for (int i = 0; i < m_reccnt; ++i) { dbuf = genupdDurableBuffer(sess, cs); Assert.assertNotNull(dbuf); writer.write(nada, mdvalue.of(dbuf)); } m_checksum = cs.getValue(); writer.close(m_tacontext); sess.close(); }
Example #14
Source File: NetezzaExternalTableImportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override protected Class<? extends OutputFormat> getOutputFormatClass() throws ClassNotFoundException { if (isHCatJob) { return SqoopHCatUtilities.getOutputFormatClass(); } else { return RawKeyTextOutputFormat.class; } }
Example #15
Source File: TestRecovery.java From hadoop with Apache License 2.0 | 5 votes |
private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key2, val2); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val2); theRecordWriter.write(nullWritable, val1); theRecordWriter.write(key1, nullWritable); theRecordWriter.write(key2, null); theRecordWriter.write(null, null); theRecordWriter.write(key1, val1); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
Example #16
Source File: LazyOutputFormat.java From big-c with Apache License 2.0 | 5 votes |
/** * Set the underlying output format for LazyOutputFormat. * @param job the {@link Job} to modify * @param theClass the underlying class */ @SuppressWarnings("unchecked") public static void setOutputFormatClass(Job job, Class<? extends OutputFormat> theClass) { job.setOutputFormatClass(LazyOutputFormat.class); job.getConfiguration().setClass(OUTPUT_FORMAT, theClass, OutputFormat.class); }
Example #17
Source File: LazyOutputFormat.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private void getBaseOutputFormat(Configuration conf) throws IOException { baseOut = ((OutputFormat<K, V>) ReflectionUtils.newInstance( conf.getClass(OUTPUT_FORMAT, null), conf)); if (baseOut == null) { throw new IOException("Output Format not set for LazyOutputFormat"); } }
Example #18
Source File: HadoopFormatIOWriteTest.java From beam with Apache License 2.0 | 5 votes |
private static Configuration loadTestConfiguration( Class<?> outputFormatClassName, Class<?> keyClass, Class<?> valueClass) { Configuration conf = new Configuration(); conf.setClass(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClassName, OutputFormat.class); conf.setClass(MRJobConfig.OUTPUT_KEY_CLASS, keyClass, Object.class); conf.setClass(MRJobConfig.OUTPUT_VALUE_CLASS, valueClass, Object.class); conf.setInt(MRJobConfig.NUM_REDUCES, REDUCERS_COUNT); conf.set(MRJobConfig.ID, String.valueOf(1)); return conf; }
Example #19
Source File: HadoopOutputFormatTest.java From flink with Apache License 2.0 | 5 votes |
private HadoopOutputFormat<String, Long> setupHadoopOutputFormat( OutputFormat<String, Long> outputFormat, Job job, RecordWriter<String, Long> recordWriter, OutputCommitter outputCommitter, Configuration configuration) { HadoopOutputFormat<String, Long> hadoopOutputFormat = new HadoopOutputFormat<>(outputFormat, job); hadoopOutputFormat.recordWriter = recordWriter; hadoopOutputFormat.outputCommitter = outputCommitter; hadoopOutputFormat.configuration = configuration; hadoopOutputFormat.configuration.set(MAPRED_OUTPUT_DIR_KEY, MAPRED_OUTPUT_PATH); return hadoopOutputFormat; }
Example #20
Source File: RecordReaderWriterTest.java From tinkerpop with Apache License 2.0 | 5 votes |
@Test public void shouldSplitFileAndWriteProperSplits() throws Exception { for (int numberOfSplits = 1; numberOfSplits < 10; numberOfSplits++) { final File testFile = new File(HadoopGraphProvider.PATHS.get(getInputFilename())); logger.info("Testing: {}", testFile + " (splits {}", numberOfSplits + ")"); final List<FileSplit> splits = generateFileSplits(testFile, numberOfSplits); final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass = getInputFormat(); final Class<? extends OutputFormat<NullWritable, VertexWritable>> outputFormatClass = getOutputFormat(); final File outputDirectory = TestHelper.makeTestDataPath(inputFormatClass, "hadoop-record-reader-writer-test"); final Configuration config = configure(outputDirectory); config.addResource(this.configuration); validateFileSplits(splits, config, inputFormatClass, Optional.of(outputFormatClass)); } }
Example #21
Source File: TestJobOutputCommitter.java From hadoop with Apache License 2.0 | 5 votes |
private void testKilledJob(String fileName, Class<? extends OutputFormat> output, String[] exclude) throws Exception { Path outDir = getNewOutputDir(); Job job = MapReduceTestUtil.createKillJob(conf, outDir, inDir); job.setOutputFormatClass(output); job.submit(); // wait for the setup to be completed while (job.setupProgress() != 1.0f) { UtilsForTests.waitFor(100); } job.killJob(); // kill the job assertFalse("Job did not get kill", job.waitForCompletion(true)); if (fileName != null) { Path testFile = new Path(outDir, fileName); assertTrue("File " + testFile + " missing for job " + job.getJobID(), fs .exists(testFile)); } // check if the files from the missing set exists for (String ex : exclude) { Path file = new Path(outDir, ex); assertFalse("File " + file + " should not be present for killed job " + job.getJobID(), fs.exists(file)); } }
Example #22
Source File: JobContextImpl.java From incubator-tez with Apache License 2.0 | 5 votes |
/** * Get the {@link OutputFormat} class for the job. * * @return the {@link OutputFormat} class for the job. */ @SuppressWarnings("unchecked") public Class<? extends OutputFormat<?,?>> getOutputFormatClass() throws ClassNotFoundException { return (Class<? extends OutputFormat<?,?>>) conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class); }
Example #23
Source File: HadoopV2Task.java From ignite with Apache License 2.0 | 5 votes |
/** * Setup task. * * @param outputFormat Output format. * @throws IOException In case of IO exception. * @throws InterruptedException In case of interrupt. */ protected void setup(@Nullable OutputFormat outputFormat) throws IOException, InterruptedException { if (hadoopCtx.writer() != null) { assert outputFormat != null; outputFormat.getOutputCommitter(hadoopCtx).setupTask(hadoopCtx); } }
Example #24
Source File: JobBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public JobBase(final SqoopOptions opts, final Class<? extends Mapper> mapperClass, final Class<? extends InputFormat> inputFormatClass, final Class<? extends OutputFormat> outputFormatClass) { this.options = opts; this.mapperClass = mapperClass; this.inputFormatClass = inputFormatClass; this.outputFormatClass = outputFormatClass; isHCatJob = options.getHCatTableName() != null; }
Example #25
Source File: ImportJobBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public ImportJobBase(final SqoopOptions opts, final Class<? extends Mapper> mapperClass, final Class<? extends InputFormat> inputFormatClass, final Class<? extends OutputFormat> outputFormatClass, final ImportJobContext context) { super(opts, mapperClass, inputFormatClass, outputFormatClass, context); }
Example #26
Source File: TransformBaseRunner.java From BigDataPlatform with GNU General Public License v3.0 | 5 votes |
public void setupRunner(String jobName, Class<?> runnerClass, Class<? extends TableMapper<?, ?>> mapperClass, Class<? extends Reducer<?, ?, ?, ?>> reducerClass, Class<? extends WritableComparable<?>> outputKeyClass, Class<? extends Writable> outputValueClass, Class<? extends OutputFormat<?, ?>> outputFormatClass) { this.setupRunner(jobName, runnerClass, mapperClass, reducerClass, outputKeyClass, outputValueClass, outputKeyClass, outputValueClass, outputFormatClass); }
Example #27
Source File: TestRecovery.java From big-c with Apache License 2.0 | 5 votes |
private void writeOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
Example #28
Source File: MultiOutputFormat.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { List<org.apache.hadoop.mapred.OutputFormat> formats = getOldApiFormats(job); for (org.apache.hadoop.mapred.OutputFormat format : formats) { format.checkOutputSpecs(ignored, job); } }
Example #29
Source File: TestRecovery.java From big-c with Apache License 2.0 | 5 votes |
private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key2, val2); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val2); theRecordWriter.write(nullWritable, val1); theRecordWriter.write(key1, nullWritable); theRecordWriter.write(key2, null); theRecordWriter.write(null, null); theRecordWriter.write(key1, val1); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
Example #30
Source File: InjectableConnManager.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
/** * Allow the user to inject custom mapper, input, and output formats * into the importTable() process. */ @Override @SuppressWarnings("unchecked") public void importTable(ImportJobContext context) throws IOException, ImportException { SqoopOptions options = context.getOptions(); Configuration conf = options.getConf(); Class<? extends Mapper> mapperClass = (Class<? extends Mapper>) conf.getClass(MAPPER_KEY, Mapper.class); Class<? extends InputFormat> ifClass = (Class<? extends InputFormat>) conf.getClass(INPUT_FORMAT_KEY, TextInputFormat.class); Class<? extends OutputFormat> ofClass = (Class<? extends OutputFormat>) conf.getClass(OUTPUT_FORMAT_KEY, TextOutputFormat.class); Class<? extends ImportJobBase> jobClass = (Class<? extends ImportJobBase>) conf.getClass(IMPORT_JOB_KEY, ImportJobBase.class); String tableName = context.getTableName(); // Instantiate the user's chosen ImportJobBase instance. ImportJobBase importJob = ReflectionUtils.newInstance(jobClass, conf); // And configure the dependencies to inject importJob.setOptions(options); importJob.setMapperClass(mapperClass); importJob.setInputFormatClass(ifClass); importJob.setOutputFormatClass(ofClass); importJob.runImport(tableName, context.getJarFile(), getSplitColumn(options, tableName), conf); }