org.apache.hadoop.mapreduce.OutputFormat Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.OutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultiMROutput.java    From tez with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private synchronized RecordWriter getNewRecordWriter(
    TaskAttemptContext taskContext, String baseFileName)
    throws IOException, InterruptedException {

  // look for record-writer in the cache
  RecordWriter writer = newRecordWriters.get(baseFileName);

  // If not in cache, create a new one
  if (writer == null) {
    // get the record writer from context output format
    taskContext.getConfiguration().set(
        MRJobConfig.FILEOUTPUTFORMAT_BASE_OUTPUT_NAME, baseFileName);
    try {
      writer = ((OutputFormat) ReflectionUtils.newInstance(
          taskContext.getOutputFormatClass(), taskContext.getConfiguration()))
          .getRecordWriter(taskContext);
    } catch (ClassNotFoundException e) {
      throw new IOException(e);
    }
    // add the record-writer to the cache
    newRecordWriters.put(baseFileName, writer);
  }
  return writer;
}
 
Example #2
Source File: TestJobOutputCommitter.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void testFailedJob(String fileName,
    Class<? extends OutputFormat> output, String[] exclude) throws Exception {
  Path outDir = getNewOutputDir();
  Job job = MapReduceTestUtil.createFailJob(conf, outDir, inDir);
  job.setOutputFormatClass(output);

  assertFalse("Job did not fail!", job.waitForCompletion(true));

  if (fileName != null) {
    Path testFile = new Path(outDir, fileName);
    assertTrue("File " + testFile + " missing for failed job " + job.getJobID(),
        fs.exists(testFile));
  }

  // check if the files from the missing set exists
  for (String ex : exclude) {
    Path file = new Path(outDir, ex);
    assertFalse("File " + file + " should not be present for failed job "
        + job.getJobID(), fs.exists(file));
  }
}
 
Example #3
Source File: MRJobTestUtil.java    From sqoop-on-spark with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("deprecation")
public static boolean runJob(Configuration conf,
    Class<? extends InputFormat<?,?>> inputFormatClass,
    Class<? extends Mapper<?,?,?,?>> mapperClass,
    Class<? extends OutputFormat<?,?>> outputFormatClass) throws IOException,
    InterruptedException, ClassNotFoundException {
  Job job = new Job(conf);
  job.setInputFormatClass(inputFormatClass);
  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setOutputFormatClass(outputFormatClass);
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(NullWritable.class);

  boolean ret = job.waitForCompletion(true);

  // Hadoop 1.0 (and 0.20) have nasty bug when job committer is not called in
  // LocalJobRuner
  if (isHadoop1()) {
    callOutputCommitter(job, outputFormatClass);
  }

  return ret;
}
 
Example #4
Source File: AvroStorage.java    From Cubert with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("rawtypes")
@Override
public OutputFormat getOutputFormat() throws IOException {
    AvroStorageLog.funcCall("getOutputFormat");

    Properties property = getUDFProperties();
    String allSchemaStr = property.getProperty(AVRO_OUTPUT_SCHEMA_PROPERTY);
    Map<String, String> map = (allSchemaStr != null)  ? parseSchemaMap(allSchemaStr) : null;

    String key = getSchemaKey();
    Schema schema = (map == null || !map.containsKey(key))  ? outputAvroSchema  : Schema.parse(map.get(key));

    if (schema == null)
        throw new IOException("Output schema is null!");
    AvroStorageLog.details("Output schema=" + schema);

    return new PigAvroOutputFormat(schema);
}
 
Example #5
Source File: AccumuloMrsPyramidOutputFormatProvider.java    From mrgeo with Apache License 2.0 6 votes vote down vote up
@Override
public OutputFormat getOutputFormat()
{
  // TODO Auto-generated method stub

  if (doBulk || forceBulk)
  {
    return new AccumuloMrsPyramidFileOutputFormat(zoomLevel, cv);
    //return new AccumuloMrsPyramidFileOutputFormat();
  }
  else
  {

    return new AccumuloMrsPyramidOutputFormat(zoomLevel, cv);
  }
}
 
Example #6
Source File: MultithreadedMapper.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
MapRunner() throws IOException, InterruptedException, 
     ClassNotFoundException {
         // initiate the real mapper that does the work
         mapper = ReflectionUtils.newInstance(mapClass,
             outer.getConfiguration());
         @SuppressWarnings("unchecked")
OutputFormat<K2, V2> outputFormat = (OutputFormat<K2, V2>) 
    ReflectionUtils.newInstance(outer.getOutputFormatClass(),
             outer.getConfiguration());
         try {
             // outputFormat is not initialized.  Relying on everything it 
             // needs can be obtained from the AssignmentManager singleton.
             writer = outputFormat.getRecordWriter(outer);
             subcontext = (Context)ReflectionUtil.createMapperContext(
                 mapper, outer.getConfiguration(), outer.getTaskAttemptID(),                 
                 new SubMapRecordReader(), writer,
                 outer.getOutputCommitter(), new SubMapStatusReporter(),
                 outer.getInputSplit());
         } catch (Exception e) {
             throw new IOException("Error creating mapper context", e);
         }
     }
 
Example #7
Source File: MapReducePOStoreImpl.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public StoreFuncInterface createStoreFunc(POStore store)
        throws IOException {

    StoreFuncInterface storeFunc = store.getStoreFunc();

    // call the setStoreLocation on the storeFunc giving it the
    // Job. Typically this will result in the OutputFormat of the
    // storeFunc storing the output location in the Configuration
    // in the Job. The PigOutFormat.setLocation() method will merge
    // this modified Configuration into the configuration of the
    // Context we have
    PigOutputFormat.setLocation(context, store);
    OutputFormat<?,?> outputFormat = storeFunc.getOutputFormat();

    // create a new record writer
    try {
        writer = outputFormat.getRecordWriter(context);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    storeFunc.prepareToWrite(writer);

    return storeFunc;
}
 
Example #8
Source File: HadoopFormatIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * This test validates functionality of {@link
 * HadoopFormatIO.Write.Builder#withConfiguration(Configuration) withConfiguration(Configuration)}
 * function when job id is not provided by the user in configuration.
 */
@Test
public void testWriteValidationFailsMissingJobIDInConf() {
  Configuration configuration = new Configuration();
  configuration.setClass(
      HadoopFormatIO.OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class, OutputFormat.class);
  configuration.setClass(HadoopFormatIO.OUTPUT_KEY_CLASS, Text.class, Object.class);
  configuration.setClass(HadoopFormatIO.OUTPUT_VALUE_CLASS, Employee.class, Object.class);
  configuration.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath());

  runValidationPipeline(configuration);

  thrown.expect(Pipeline.PipelineExecutionException.class);
  thrown.expectMessage("Configuration must contain \"mapreduce.job.id\"");

  p.run().waitUntilFinish();
}
 
Example #9
Source File: TestJobOutputCommitter.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void testFailedJob(String fileName,
    Class<? extends OutputFormat> output, String[] exclude) throws Exception {
  Path outDir = getNewOutputDir();
  Job job = MapReduceTestUtil.createFailJob(conf, outDir, inDir);
  job.setOutputFormatClass(output);

  assertFalse("Job did not fail!", job.waitForCompletion(true));

  if (fileName != null) {
    Path testFile = new Path(outDir, fileName);
    assertTrue("File " + testFile + " missing for failed job " + job.getJobID(),
        fs.exists(testFile));
  }

  // check if the files from the missing set exists
  for (String ex : exclude) {
    Path file = new Path(outDir, ex);
    assertFalse("File " + file + " should not be present for failed job "
        + job.getJobID(), fs.exists(file));
  }
}
 
Example #10
Source File: MneMapreduceLongDataTest.java    From mnemonic with Apache License 2.0 6 votes vote down vote up
@Test(enabled = true)
public void testWriteLongData() throws Exception {
  NullWritable nada = NullWritable.get();
  MneDurableOutputSession<Long> sess =
      new MneDurableOutputSession<Long>(m_tacontext, null,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
  MneDurableOutputValue<Long> mdvalue =
      new MneDurableOutputValue<Long>(sess);
  OutputFormat<NullWritable, MneDurableOutputValue<Long>> outputFormat =
      new MneOutputFormat<MneDurableOutputValue<Long>>();
  RecordWriter<NullWritable, MneDurableOutputValue<Long>> writer =
      outputFormat.getRecordWriter(m_tacontext);
  Long val = null;
  for (int i = 0; i < m_reccnt; ++i) {
    val = m_rand.nextLong();
    m_sum += val;
    writer.write(nada, mdvalue.of(val));
  }
  writer.close(m_tacontext);
  sess.close();
}
 
Example #11
Source File: HadoopFormatIO.java    From beam with Apache License 2.0 6 votes vote down vote up
private static OutputCommitter initOutputCommitter(
    OutputFormat<?, ?> outputFormatObj,
    Configuration conf,
    TaskAttemptContext taskAttemptContext)
    throws IllegalStateException {
  OutputCommitter outputCommitter;
  try {
    outputCommitter = outputFormatObj.getOutputCommitter(taskAttemptContext);
    if (outputCommitter != null) {
      outputCommitter.setupJob(new JobContextImpl(conf, taskAttemptContext.getJobID()));
    }
  } catch (Exception e) {
    throw new IllegalStateException("Unable to create OutputCommitter object: ", e);
  }

  return outputCommitter;
}
 
Example #12
Source File: MneMapreduceChunkDataTest.java    From mnemonic with Apache License 2.0 6 votes vote down vote up
@Test(enabled = true)
public void testWriteChunkData() throws Exception {
  NullWritable nada = NullWritable.get();
  MneDurableOutputSession<DurableChunk<?>> sess =
      new MneDurableOutputSession<DurableChunk<?>>(m_tacontext, null,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
  MneDurableOutputValue<DurableChunk<?>> mdvalue =
      new MneDurableOutputValue<DurableChunk<?>>(sess);
  OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat =
      new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>();
  RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer =
      outputFormat.getRecordWriter(m_tacontext);
  DurableChunk<?> dchunk = null;
  Checksum cs = new CRC32();
  cs.reset();
  for (int i = 0; i < m_reccnt; ++i) {
    dchunk = genupdDurableChunk(sess, cs);
    Assert.assertNotNull(dchunk);
    writer.write(nada, mdvalue.of(dchunk));
  }
  m_checksum = cs.getValue();
  writer.close(m_tacontext);
  sess.close();
}
 
Example #13
Source File: MneMapreduceBufferDataTest.java    From mnemonic with Apache License 2.0 6 votes vote down vote up
@Test(enabled = true)
public void testWriteBufferData() throws Exception {
  NullWritable nada = NullWritable.get();
  MneDurableOutputSession<DurableBuffer<?>> sess =
      new MneDurableOutputSession<DurableBuffer<?>>(m_tacontext, null,
          MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
  MneDurableOutputValue<DurableBuffer<?>> mdvalue =
      new MneDurableOutputValue<DurableBuffer<?>>(sess);
  OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat =
      new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>();
  RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer =
      outputFormat.getRecordWriter(m_tacontext);
  DurableBuffer<?> dbuf = null;
  Checksum cs = new CRC32();
  cs.reset();
  for (int i = 0; i < m_reccnt; ++i) {
    dbuf = genupdDurableBuffer(sess, cs);
    Assert.assertNotNull(dbuf);
    writer.write(nada, mdvalue.of(dbuf));
  }
  m_checksum = cs.getValue();
  writer.close(m_tacontext);
  sess.close();
}
 
Example #14
Source File: NetezzaExternalTableImportJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
protected Class<? extends OutputFormat> getOutputFormatClass()
    throws ClassNotFoundException {
  if (isHCatJob) {
    return SqoopHCatUtilities.getOutputFormatClass();
  } else {
    return RawKeyTextOutputFormat.class;
  }
}
 
Example #15
Source File: TestRecovery.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void writeBadOutput(TaskAttempt attempt, Configuration conf)
  throws Exception {
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, 
      TypeConverter.fromYarn(attempt.getID()));
 
  TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat
      .getRecordWriter(tContext);
  
  NullWritable nullWritable = NullWritable.get();
  try {
    theRecordWriter.write(key2, val2);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val2);
    theRecordWriter.write(nullWritable, val1);
    theRecordWriter.write(key1, nullWritable);
    theRecordWriter.write(key2, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key1, val1);
  } finally {
    theRecordWriter.close(tContext);
  }
  
  OutputFormat outputFormat = ReflectionUtils.newInstance(
      tContext.getOutputFormatClass(), conf);
  OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
  committer.commitTask(tContext);
}
 
Example #16
Source File: LazyOutputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Set the underlying output format for LazyOutputFormat.
 * @param job the {@link Job} to modify
 * @param theClass the underlying class
 */
@SuppressWarnings("unchecked")
public static void  setOutputFormatClass(Job job, 
                                   Class<? extends OutputFormat> theClass) {
    job.setOutputFormatClass(LazyOutputFormat.class);
    job.getConfiguration().setClass(OUTPUT_FORMAT, 
        theClass, OutputFormat.class);
}
 
Example #17
Source File: LazyOutputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void getBaseOutputFormat(Configuration conf) 
throws IOException {
  baseOut =  ((OutputFormat<K, V>) ReflectionUtils.newInstance(
    conf.getClass(OUTPUT_FORMAT, null), conf));
  if (baseOut == null) {
    throw new IOException("Output Format not set for LazyOutputFormat");
  }
}
 
Example #18
Source File: HadoopFormatIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static Configuration loadTestConfiguration(
    Class<?> outputFormatClassName, Class<?> keyClass, Class<?> valueClass) {
  Configuration conf = new Configuration();
  conf.setClass(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClassName, OutputFormat.class);
  conf.setClass(MRJobConfig.OUTPUT_KEY_CLASS, keyClass, Object.class);
  conf.setClass(MRJobConfig.OUTPUT_VALUE_CLASS, valueClass, Object.class);
  conf.setInt(MRJobConfig.NUM_REDUCES, REDUCERS_COUNT);
  conf.set(MRJobConfig.ID, String.valueOf(1));
  return conf;
}
 
Example #19
Source File: HadoopOutputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private HadoopOutputFormat<String, Long> setupHadoopOutputFormat(
	OutputFormat<String, Long> outputFormat,
	Job job,
	RecordWriter<String, Long> recordWriter,
	OutputCommitter outputCommitter,
	Configuration configuration) {

	HadoopOutputFormat<String, Long> hadoopOutputFormat = new HadoopOutputFormat<>(outputFormat, job);
	hadoopOutputFormat.recordWriter = recordWriter;
	hadoopOutputFormat.outputCommitter = outputCommitter;
	hadoopOutputFormat.configuration = configuration;
	hadoopOutputFormat.configuration.set(MAPRED_OUTPUT_DIR_KEY, MAPRED_OUTPUT_PATH);

	return hadoopOutputFormat;
}
 
Example #20
Source File: RecordReaderWriterTest.java    From tinkerpop with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldSplitFileAndWriteProperSplits() throws Exception {
    for (int numberOfSplits = 1; numberOfSplits < 10; numberOfSplits++) {
        final File testFile = new File(HadoopGraphProvider.PATHS.get(getInputFilename()));
        logger.info("Testing: {}", testFile + " (splits {}", numberOfSplits + ")");
        final List<FileSplit> splits = generateFileSplits(testFile, numberOfSplits);
        final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass = getInputFormat();
        final Class<? extends OutputFormat<NullWritable, VertexWritable>> outputFormatClass = getOutputFormat();

        final File outputDirectory = TestHelper.makeTestDataPath(inputFormatClass, "hadoop-record-reader-writer-test");
        final Configuration config = configure(outputDirectory);
        config.addResource(this.configuration);
        validateFileSplits(splits, config, inputFormatClass, Optional.of(outputFormatClass));
    }
}
 
Example #21
Source File: TestJobOutputCommitter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void testKilledJob(String fileName,
    Class<? extends OutputFormat> output, String[] exclude) throws Exception {
  Path outDir = getNewOutputDir();
  Job job = MapReduceTestUtil.createKillJob(conf, outDir, inDir);
  job.setOutputFormatClass(output);

  job.submit();

  // wait for the setup to be completed
  while (job.setupProgress() != 1.0f) {
    UtilsForTests.waitFor(100);
  }

  job.killJob(); // kill the job

  assertFalse("Job did not get kill", job.waitForCompletion(true));

  if (fileName != null) {
    Path testFile = new Path(outDir, fileName);
    assertTrue("File " + testFile + " missing for job " + job.getJobID(), fs
        .exists(testFile));
  }

  // check if the files from the missing set exists
  for (String ex : exclude) {
    Path file = new Path(outDir, ex);
    assertFalse("File " + file + " should not be present for killed job "
        + job.getJobID(), fs.exists(file));
  }
}
 
Example #22
Source File: JobContextImpl.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
/**
 * Get the {@link OutputFormat} class for the job.
 * 
 * @return the {@link OutputFormat} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends OutputFormat<?,?>> getOutputFormatClass() 
   throws ClassNotFoundException {
  return (Class<? extends OutputFormat<?,?>>) 
    conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class);
}
 
Example #23
Source File: HadoopV2Task.java    From ignite with Apache License 2.0 5 votes vote down vote up
/**
 * Setup task.
 *
 * @param outputFormat Output format.
 * @throws IOException In case of IO exception.
 * @throws InterruptedException In case of interrupt.
 */
protected void setup(@Nullable OutputFormat outputFormat) throws IOException, InterruptedException {
    if (hadoopCtx.writer() != null) {
        assert outputFormat != null;

        outputFormat.getOutputCommitter(hadoopCtx).setupTask(hadoopCtx);
    }
}
 
Example #24
Source File: JobBase.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
public JobBase(final SqoopOptions opts,
    final Class<? extends Mapper> mapperClass,
    final Class<? extends InputFormat> inputFormatClass,
    final Class<? extends OutputFormat> outputFormatClass) {

  this.options = opts;
  this.mapperClass = mapperClass;
  this.inputFormatClass = inputFormatClass;
  this.outputFormatClass = outputFormatClass;
  isHCatJob = options.getHCatTableName() != null;
}
 
Example #25
Source File: ImportJobBase.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
public ImportJobBase(final SqoopOptions opts,
    final Class<? extends Mapper> mapperClass,
    final Class<? extends InputFormat> inputFormatClass,
    final Class<? extends OutputFormat> outputFormatClass,
    final ImportJobContext context) {
  super(opts, mapperClass, inputFormatClass, outputFormatClass, context);
}
 
Example #26
Source File: TransformBaseRunner.java    From BigDataPlatform with GNU General Public License v3.0 5 votes vote down vote up
public void setupRunner(String jobName, Class<?> runnerClass,
    Class<? extends TableMapper<?, ?>> mapperClass, Class<? extends Reducer<?, ?, ?, ?>> reducerClass,
    Class<? extends WritableComparable<?>> outputKeyClass,
    Class<? extends Writable> outputValueClass,
    Class<? extends OutputFormat<?, ?>> outputFormatClass) {
    this.setupRunner(jobName, runnerClass, mapperClass, reducerClass, outputKeyClass, outputValueClass, outputKeyClass, outputValueClass, outputFormatClass);

}
 
Example #27
Source File: TestRecovery.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void writeOutput(TaskAttempt attempt, Configuration conf)
  throws Exception {
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, 
      TypeConverter.fromYarn(attempt.getID()));
  
  TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat
      .getRecordWriter(tContext);
  
  NullWritable nullWritable = NullWritable.get();
  try {
    theRecordWriter.write(key1, val1);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val1);
    theRecordWriter.write(nullWritable, val2);
    theRecordWriter.write(key2, nullWritable);
    theRecordWriter.write(key1, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key2, val2);
  } finally {
    theRecordWriter.close(tContext);
  }
  
  OutputFormat outputFormat = ReflectionUtils.newInstance(
      tContext.getOutputFormatClass(), conf);
  OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
  committer.commitTask(tContext);
}
 
Example #28
Source File: MultiOutputFormat.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
    List<org.apache.hadoop.mapred.OutputFormat> formats = getOldApiFormats(job);
    for (org.apache.hadoop.mapred.OutputFormat format : formats) {
        format.checkOutputSpecs(ignored, job);
    }
}
 
Example #29
Source File: TestRecovery.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void writeBadOutput(TaskAttempt attempt, Configuration conf)
  throws Exception {
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, 
      TypeConverter.fromYarn(attempt.getID()));
 
  TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat
      .getRecordWriter(tContext);
  
  NullWritable nullWritable = NullWritable.get();
  try {
    theRecordWriter.write(key2, val2);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val2);
    theRecordWriter.write(nullWritable, val1);
    theRecordWriter.write(key1, nullWritable);
    theRecordWriter.write(key2, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key1, val1);
  } finally {
    theRecordWriter.close(tContext);
  }
  
  OutputFormat outputFormat = ReflectionUtils.newInstance(
      tContext.getOutputFormatClass(), conf);
  OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
  committer.commitTask(tContext);
}
 
Example #30
Source File: InjectableConnManager.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
/**
 * Allow the user to inject custom mapper, input, and output formats
 * into the importTable() process.
 */
@Override
@SuppressWarnings("unchecked")
public void importTable(ImportJobContext context)
    throws IOException, ImportException {

  SqoopOptions options = context.getOptions();
  Configuration conf = options.getConf();

  Class<? extends Mapper> mapperClass = (Class<? extends Mapper>)
      conf.getClass(MAPPER_KEY, Mapper.class);
  Class<? extends InputFormat> ifClass = (Class<? extends InputFormat>)
      conf.getClass(INPUT_FORMAT_KEY, TextInputFormat.class);
  Class<? extends OutputFormat> ofClass = (Class<? extends OutputFormat>)
      conf.getClass(OUTPUT_FORMAT_KEY, TextOutputFormat.class);

  Class<? extends ImportJobBase> jobClass = (Class<? extends ImportJobBase>)
      conf.getClass(IMPORT_JOB_KEY, ImportJobBase.class);

  String tableName = context.getTableName();

  // Instantiate the user's chosen ImportJobBase instance.
  ImportJobBase importJob = ReflectionUtils.newInstance(jobClass, conf);

  // And configure the dependencies to inject
  importJob.setOptions(options);
  importJob.setMapperClass(mapperClass);
  importJob.setInputFormatClass(ifClass);
  importJob.setOutputFormatClass(ofClass);

  importJob.runImport(tableName, context.getJarFile(),
      getSplitColumn(options, tableName), conf);
}